From ca9112a424ff68ec4f2ef67b47122f7d61412964 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Mon, 15 Aug 2016 13:49:49 -0400
Subject: Stamp HEAD as 10devel.
This is a good bit more complicated than the average new-version stamping
commit, because it includes various adjustments in pursuit of changing
from three-part to two-part version numbers. It's likely some further
work will be needed around that change; but this is enough to get through
the regression tests, at least in Unix builds.
Peter Eisentraut and Tom Lane
---
doc/src/sgml/runtime.sgml | 31 ++++++++++++++++++++-----------
1 file changed, 20 insertions(+), 11 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml
index 8ba95e1b84..66fbe441ac 100644
--- a/doc/src/sgml/runtime.sgml
+++ b/doc/src/sgml/runtime.sgml
@@ -1601,17 +1601,26 @@ $ kill -INT `head -1 /usr/local/pgsql/data/postmaster.pid`
- PostgreSQL> major versions are represented by the
- first two digit groups of the version number, e.g., 8.4.
- PostgreSQL> minor versions are represented by the
- third group of version digits, e.g., 8.4.2 is the second minor
- release of 8.4. Minor releases never change the internal storage
- format and are always compatible with earlier and later minor
- releases of the same major version number, e.g., 8.4.2 is compatible
- with 8.4, 8.4.1 and 8.4.6. To update between compatible versions,
- you simply replace the executables while the server is down and
- restart the server. The data directory remains unchanged —
- minor upgrades are that simple.
+ Current PostgreSQL version numbers consist of a
+ major and a minor version number. For example, in the version number 10.1,
+ the 10 is the major version number and the 1 is the minor version number,
+ meaning this would be the first minor release of the major release 10. For
+ releases before PostgreSQL version 10.0, version
+ numbers consist of three numbers, for example, 9.5.3. In those cases, the
+ major version consists of the first two digit groups of the version number,
+ e.g., 9.5, and the minor version is the third number, e.g., 3, meaning this
+ would be the third minor release of the major release 9.5.
+
+
+
+ Minor releases never change the internal storage format and are always
+ compatible with earlier and later minor releases of the same major version
+ number. For example, version 10.1 is compatible with version 10.0 and
+ version 10.6. Similarly, for example, 9.5.3 is compatible with 9.5.0,
+ 9.5.1, and 9.5.6. To update between compatible versions, you simply
+ replace the executables while the server is down and restart the server.
+ The data directory remains unchanged — minor upgrades are that
+ simple.
--
cgit v1.2.3
From 8fc571b7dd9fa1659536a26bb085584b50a65a51 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Tue, 16 Aug 2016 10:59:14 -0400
Subject: Doc: remove out-of-date claim that pg_am rows must be inserted by
hand.
Commit 473b93287 added a sentence about that, but neglected to remove
the adjacent sentence it had falsified. Per Alexander Law.
---
doc/src/sgml/indexam.sgml | 11 ++---------
1 file changed, 2 insertions(+), 9 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml
index b59cd0363a..40f201b11b 100644
--- a/doc/src/sgml/indexam.sgml
+++ b/doc/src/sgml/indexam.sgml
@@ -51,16 +51,9 @@
pg_am
system catalog. The pg_am entry
specifies a name and a handler function> for the access
- method. There is not currently any special support
- for creating or deleting pg_am entries;
- anyone able to write a new access method is expected to be competent
- to insert an appropriate row for themselves.
-
-
-
- Index access methods can be defined and dropped using
+ method. These entries can be created and deleted using the
and
- SQL commands respectively.
+ SQL commands.
--
cgit v1.2.3
From 9b002cc9fec557fcfe17d67f55b53804447230e5 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Tue, 16 Aug 2016 11:35:36 -0400
Subject: Doc: copy-editing in create_access_method.sgml.
Improve shaky English grammar. And markup.
---
doc/src/sgml/ref/create_access_method.sgml | 27 +++++++++++++--------------
1 file changed, 13 insertions(+), 14 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/create_access_method.sgml b/doc/src/sgml/ref/create_access_method.sgml
index 3c091f8021..0a30e6ea3c 100644
--- a/doc/src/sgml/ref/create_access_method.sgml
+++ b/doc/src/sgml/ref/create_access_method.sgml
@@ -57,29 +57,28 @@ CREATE ACCESS METHOD name
- access_method_type
+ access_method_type
- This clause specifies type of access method to define.
+ This clause specifies the type of access method to define.
Only INDEX is supported at present.
- HANDLER handler_function
+ handler_function
- handler_function is the
- name of a previously registered function that will be called to
- retrieve the struct which contains required parameters and functions
- of access method to the core. The handler function must take single
- argument of type internal>, and its return type depends on the
- type of access method; for INDEX access methods, it
- must be index_am_handler.
-
-
- See for index access methods API.
+ handler_function is the
+ name (possibly schema-qualified) of a previously registered function
+ that represents the access method. The handler function must be
+ declared to take a single argument of type internal>,
+ and its return type depends on the type of access method;
+ for INDEX access methods, it must
+ be index_am_handler. The C-level API that the handler
+ function must implement varies depending on the type of access method.
+ The index access method API is described in .
@@ -90,7 +89,7 @@ CREATE ACCESS METHOD nameExamples
- Create an access method heptree> with
+ Create an index access method heptree> with
handler function heptree_handler>:
CREATE ACCESS METHOD heptree TYPE INDEX HANDLER heptree_handler;
--
cgit v1.2.3
From f0fe1c8f70bacb65513f1cbaea14eb384d346ee8 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Tue, 16 Aug 2016 12:00:00 -0400
Subject: Fix typos
From: Alexander Law
---
doc/src/sgml/release-9.6.sgml | 2 +-
doc/src/sgml/runtime.sgml | 2 +-
src/backend/access/transam/multixact.c | 2 +-
src/backend/utils/adt/tsquery.c | 2 +-
src/test/regress/expected/privileges.out | 2 +-
src/test/regress/sql/privileges.sql | 2 +-
6 files changed, 6 insertions(+), 6 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/release-9.6.sgml b/doc/src/sgml/release-9.6.sgml
index cc886fa2bb..9003b1f6e4 100644
--- a/doc/src/sgml/release-9.6.sgml
+++ b/doc/src/sgml/release-9.6.sgml
@@ -1026,7 +1026,7 @@ This commit is also listed under libpq and psql
This view exposes the same information available from
- the the pg_config> comand-line utility,
+ the pg_config> comand-line utility,
namely assorted compile-time configuration information for
PostgreSQL>.
diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml
index 66fbe441ac..60a06590fe 100644
--- a/doc/src/sgml/runtime.sgml
+++ b/doc/src/sgml/runtime.sgml
@@ -184,7 +184,7 @@ postgres$ initdb -D /usr/local/pgsql/data
- Non-C> and and non-POSIX> locales rely on the
+ Non-C> and non-POSIX> locales rely on the
operating system's collation library for character set ordering.
This controls the ordering of keys stored in indexes. For this reason,
a cluster cannot switch to an incompatible collation library version,
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index c2e4fa377d..0c8c17af33 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -2802,7 +2802,7 @@ ReadMultiXactCounts(uint32 *multixacts, MultiXactOffset *members)
* more aggressive in clamping this value. That not only causes autovacuum
* to ramp up, but also makes any manual vacuums the user issues more
* aggressive. This happens because vacuum_set_xid_limits() clamps the
- * freeze table and and the minimum freeze age based on the effective
+ * freeze table and the minimum freeze age based on the effective
* autovacuum_multixact_freeze_max_age this function returns. In the worst
* case, we'll claim the freeze_max_age to zero, and every vacuum of any
* table will try to freeze every multixact.
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c
index c0a4a0606b..3d11a1c208 100644
--- a/src/backend/utils/adt/tsquery.c
+++ b/src/backend/utils/adt/tsquery.c
@@ -691,7 +691,7 @@ parse_tsquery(char *buf,
findoprnd(ptr, query->size, &needcleanup);
/*
- * QI_VALSTOP nodes should be cleaned and and OP_PHRASE should be pushed
+ * QI_VALSTOP nodes should be cleaned and OP_PHRASE should be pushed
* down
*/
if (needcleanup)
diff --git a/src/test/regress/expected/privileges.out b/src/test/regress/expected/privileges.out
index 996ebcdca2..f66b4432a1 100644
--- a/src/test/regress/expected/privileges.out
+++ b/src/test/regress/expected/privileges.out
@@ -390,7 +390,7 @@ INSERT INTO atest5(two) VALUES (6) ON CONFLICT (two) DO UPDATE set one = 8; -- f
ERROR: permission denied for relation atest5
INSERT INTO atest5(three) VALUES (4) ON CONFLICT (two) DO UPDATE set three = 10; -- fails (due to INSERT)
ERROR: permission denied for relation atest5
--- Check that the the columns in the inference require select privileges
+-- Check that the columns in the inference require select privileges
-- Error. No privs on four
INSERT INTO atest5(three) VALUES (4) ON CONFLICT (four) DO UPDATE set three = 10;
ERROR: permission denied for relation atest5
diff --git a/src/test/regress/sql/privileges.sql b/src/test/regress/sql/privileges.sql
index 0aa9c672d5..00dc7bd4ab 100644
--- a/src/test/regress/sql/privileges.sql
+++ b/src/test/regress/sql/privileges.sql
@@ -259,7 +259,7 @@ INSERT INTO atest5(two) VALUES (6) ON CONFLICT (two) DO UPDATE set three = EXCLU
INSERT INTO atest5(two) VALUES (6) ON CONFLICT (two) DO UPDATE set three = EXCLUDED.three;
INSERT INTO atest5(two) VALUES (6) ON CONFLICT (two) DO UPDATE set one = 8; -- fails (due to UPDATE)
INSERT INTO atest5(three) VALUES (4) ON CONFLICT (two) DO UPDATE set three = 10; -- fails (due to INSERT)
--- Check that the the columns in the inference require select privileges
+-- Check that the columns in the inference require select privileges
-- Error. No privs on four
INSERT INTO atest5(three) VALUES (4) ON CONFLICT (four) DO UPDATE set three = 10;
--
cgit v1.2.3
From 639166641102871e09f9c4aebc71df57566a0a4a Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Tue, 16 Aug 2016 12:00:00 -0400
Subject: doc: Remove some confusion from pg_archivecleanup doc
From: Jeff Janes
---
doc/src/sgml/ref/pgarchivecleanup.sgml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/pgarchivecleanup.sgml b/doc/src/sgml/ref/pgarchivecleanup.sgml
index 60a7fc4e6b..abe01bef4f 100644
--- a/doc/src/sgml/ref/pgarchivecleanup.sgml
+++ b/doc/src/sgml/ref/pgarchivecleanup.sgml
@@ -122,7 +122,7 @@ pg_archivecleanup: removing file "archive/00000001000000370000000E"
extension>
- When using the program as a standalone utility, provide an extension
+ Provide an extension
that will be stripped from all file names before deciding if they
should be deleted. This is typically useful for cleaning up archives
that have been compressed during storage, and therefore have had an
--
cgit v1.2.3
From d125d25790901683d5ad16bfc96e9de4ccc9a581 Mon Sep 17 00:00:00 2001
From: Bruce Momjian
Date: Tue, 16 Aug 2016 23:04:50 -0400
Subject: docs: my third pass over the 9.6 release notes
Backpatch-through: 9.6
---
doc/src/sgml/release-9.6.sgml | 628 ++++++++++++++++++++++--------------------
1 file changed, 332 insertions(+), 296 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/release-9.6.sgml b/doc/src/sgml/release-9.6.sgml
index 9003b1f6e4..8d7356e27f 100644
--- a/doc/src/sgml/release-9.6.sgml
+++ b/doc/src/sgml/release-9.6.sgml
@@ -401,7 +401,7 @@ This commit is also listed under libpq and psql
Allow GIN>> index builds to
make effective use of
- settings larger than 1GB (Robert Abraham, Teodor Sigaev)
+ settings larger than 1 GB (Robert Abraham, Teodor Sigaev)
@@ -467,55 +467,130 @@ This commit is also listed under libpq and psql
- General Performance
+ Sorting
- Avoid re-vacuuming pages containing only frozen tuples (Masahiko
- Sawada, Robert Haas, Andres Freund)
+ Improve sorting performance by using quicksort, not replacement
+ selection sort, when performing external sort steps (Peter
+ Geoghegan)
- Formerly, anti-wraparound vacuum had to visit every page of
- a table, even pages where there was nothing to do. Now, pages
- containing only already-frozen tuples are identified in the table's
- visibility map, and can be skipped by vacuum even when doing
- transaction wraparound prevention. This should greatly reduce the
- cost of maintaining large tables containing mostly-unchanged data.
+ The new approach makes better use of the CPU> cache
+ for typical cache sizes and data volumes. Where necessary,
+ the behavior can be adjusted via the new configuration parameter
+ .
+
+
+
- If necessary, vacuum can be forced to process all-frozen
- pages using the new DISABLE_PAGE_SKIPPING> option.
- Normally, this should never be needed but it might help in
- recovering from visibility-map corruption.
+ Speed up text sorts where the same string occurs multiple times
+ (Peter Geoghegan)
- Avoid useless heap-truncation attempts during VACUUM>
- (Jeff Janes, Tom Lane)
+ Speed up sorting of uuid>, bytea>, and
+ char(n)> fields by using abbreviated> keys
+ (Peter Geoghegan)
- This change avoids taking an exclusive table lock in some cases
- where no truncation is possible. The main benefit comes from
- avoiding unnecessary query cancellations on standby servers.
+ Support for abbreviated keys has also been
+ added to the non-default operator classes text_pattern_ops>>,
+ varchar_pattern_ops>, and
+ bpchar_pattern_ops>. Processing of ordered-set
+ aggregates can also now exploit abbreviated keys.
+
+
+
+
+
+
+ Speed up CREATE INDEX CONCURRENTLY> by treating
+ TID>s as 64-bit integers during sorting (Peter
+ Geoghegan)
+
+
+
+
+
+
+
+
+ Locking
+
+
+
+
+
+
+ Reduce contention for the ProcArrayLock> (Amit Kapila,
+ Robert Haas)
+
+
+
+
+
+
+ Improve performance by moving buffer content locks into the buffer
+ descriptors (Andres Freund, Simon Riggs)
+
+
+
+
+
+
+ Replace shared-buffer header spinlocks with atomic operations to
+ improve scalability (Alexander Korotkov, Andres Freund)
+
+
+
+
+
+
+ Use atomic operations, rather than a spinlock, to protect an
+ LWLock>'s wait queue (Andres Freund)
+
+
+
+
+
+
+ Partition the shared hash table freelist to reduce contention on
+ multi-CPU>-socket servers (Aleksander Alekseev)
@@ -531,111 +606,138 @@ This commit is also listed under libpq and psql
This change avoids substantial replication delays that sometimes
- occurre while replaying such operations.
+ occurred while replaying such operations.
+
+
+
+
+
+ Optimizer Statistics
+
+
+
- Avoid computing GROUP BY> columns if they are
- functionally dependent on other columns (David Rowley)
+ Improve ANALYZE>'s estimates for columns with many nulls
+ (Tomas Vondra, Alex Shulgin)
- If a GROUP BY> clause includes all columns of a
- non-deferred primary key, as well as other columns of the same
- table, those other columns are redundant and can be dropped
- from the grouping. This saves computation in many common cases.
+ Previously ANALYZE> tended to underestimate the number
+ of non-NULL> distinct values in a column with many
+ NULL>s, and was also inaccurate in computing the
+ most-common values.
- When appropriate, postpone evaluation of SELECT>
- output expressions until after an ORDER BY> sort
- (Konstantin Knizhnik)
-
-
-
- This change ensures that volatile or expensive functions in the
- output list are executed in the order suggested by ORDER
- BY>, and that they are not evaluated more times than required
- when there is a LIMIT> clause. Previously, these
- properties held if the ordering was performed by an index scan or
- pre-merge-join sort, but not if it was performed by a top-level
- sort.
+ Improve planner's estimate of the number of distinct values in
+ a query result (Tomas Vondra)
- Where feasible, trigger kernel writeback after a configurable
- number of writes, to prevent accumulation of dirty data in kernel
- disk buffers (Fabien Coelho, Andres Freund)
+ Use foreign key relationships to infer selectivity for join
+ predicates (Tomas Vondra, David Rowley)
- PostgreSQL> writes data to the kernel's disk cache,
- from where it will be flushed to physical storage in due time.
- Many operating systems are not smart about managing this and allow
- large amounts of dirty data to accumulate before deciding to flush
- it all at once, leading to long delays for new I/O requests.
- This change attempts to alleviate this problem by explicitly
- requesting data flushes after a configurable interval.
+ If a table t> has a foreign key restriction, say
+ (a,b) REFERENCES r (x,y)>, then a WHERE>
+ condition such as t.a = r.x AND t.b = r.y> cannot
+ select more than one r> row per t> row.
+ The planner formerly considered AND> conditions
+ to be independent and would often drastically misestimate
+ selectivity as a result. Now it compares the WHERE>
+ conditions to applicable foreign key constraints and produces
+ better estimates.
+
+
+
+
+
+
+
+ VACUUM>
+
+
+
+
- On Linux, sync_file_range()> is used for this purpose,
- and the feature is on by default on Linux because that function has few
- downsides. This sync capability is also available on other platforms
- that have msync()> or posix_fadvise()>,
- but those interfaces have some undesirable side-effects so the
- feature is disabled by default on non-Linux platforms.
+ Avoid re-vacuuming pages containing only frozen tuples (Masahiko
+ Sawada, Robert Haas, Andres Freund)
- The new configuration parameters , , , and control this behavior.
+ Formerly, anti-wraparound vacuum had to visit every page of
+ a table, even pages where there was nothing to do. Now, pages
+ containing only already-frozen tuples are identified in the table's
+ visibility map, and can be skipped by vacuum even when doing
+ transaction wraparound prevention. This should greatly reduce the
+ cost of maintaining large tables containing mostly-unchanged data.
+
+
+
+ If necessary, vacuum can be forced to process all-frozen
+ pages using the new DISABLE_PAGE_SKIPPING> option.
+ Normally, this should never be needed but it might help in
+ recovering from visibility-map corruption.
- Perform checkpoint writes in sorted order (Fabien Coelho,
- Andres Freund)
+ Avoid useless heap-truncation attempts during VACUUM>
+ (Jeff Janes, Tom Lane)
- Previously, checkpoints wrote out dirty pages in whatever order
- they happen to appear in shared buffers, which usually is nearly
- random. That performs poorly, especially on rotating media.
- This change causes checkpoint-driven writes to be done in order
- by file and block number, and to be balanced across tablespaces.
+ This change avoids taking an exclusive table lock in some cases
+ where no truncation is possible. The main benefit comes from
+ avoiding unnecessary query cancellations on standby servers.
+
+
+
+
+
+ General Performance
+
+
+
+
+ Avoid computing GROUP BY> columns if they are
+ functionally dependent on other columns (David Rowley)
+
+
+
+ If a GROUP BY> clause includes all columns of a
+ non-deferred primary key, as well as other columns of the same
+ table, those other columns are redundant and can be dropped
+ from the grouping. This saves computation in many common cases.
+
+
+
+
+
+
+ When appropriate, postpone evaluation of SELECT>
+ output expressions until after an ORDER BY> sort
+ (Konstantin Knizhnik)
+
+
+
+ This change ensures that volatile or expensive functions in the
+ output list are executed in the order suggested by ORDER
+ BY>, and that they are not evaluated more times than required
+ when there is a LIMIT> clause. Previously, these
+ properties held if the ordering was performed by an index scan or
+ pre-merge-join sort, but not if it was performed by a top-level
+ sort.
+
+
+
+
+
@@ -687,25 +828,63 @@ This commit is also listed under libpq and psql
- Use foreign key relationships to infer selectivity for join
- predicates (Tomas Vondra, David Rowley)
+ Perform checkpoint writes in sorted order (Fabien Coelho,
+ Andres Freund)
- If a table t> has a foreign key restriction, say
- (a,b) REFERENCES r (x,y)>, then a WHERE>
- condition such as t.a = r.x AND t.b = r.y> cannot
- select more than one r> row per t> row.
- The planner formerly considered AND> conditions
- to be independent and would often drastically misestimate
- selectivity as a result. Now it compares the WHERE>
- conditions to applicable foreign key constraints and produces
- better estimates.
+ Previously, checkpoints wrote out dirty pages in whatever order
+ they happen to appear in shared buffers, which usually is nearly
+ random. That performs poorly, especially on rotating media.
+ This change causes checkpoint-driven writes to be done in order
+ by file and block number, and to be balanced across tablespaces.
+
+
+
+
+
+
+ Where feasible, trigger kernel writeback after a configurable
+ number of writes, to prevent accumulation of dirty data in kernel
+ disk buffers (Fabien Coelho, Andres Freund)
+
+
+
+ PostgreSQL> writes data to the kernel's disk cache,
+ from where it will be flushed to physical storage in due time.
+ Many operating systems are not smart about managing this and allow
+ large amounts of dirty data to accumulate before deciding to flush
+ it all at once, leading to long delays for new I/O requests.
+ This change attempts to alleviate this problem by explicitly
+ requesting data flushes after a configurable interval.
+
+
+
+ On Linux, sync_file_range()> is used for this purpose,
+ and the feature is on by default on Linux because that function has few
+ downsides. This sync capability is also available on other platforms
+ that have msync()> or posix_fadvise()>,
+ but those interfaces have some undesirable side-effects so the
+ feature is disabled by default on non-Linux platforms.
+
+
+
+ The new configuration parameters , , , and control this behavior.
@@ -808,35 +987,6 @@ This commit is also listed under libpq and psql
-
- Improve ANALYZE>'s estimates for columns with many nulls
- (Tomas Vondra, Alex Shulgin)
-
-
-
- Previously ANALYZE> tended to underestimate the number
- of non-NULL> distinct values in a column with many
- NULL>s, and was also inaccurate in computing the
- most-common values.
-
-
-
-
-
-
- Improve planner's estimate of the number of distinct values in
- a query result (Tomas Vondra)
-
-
-
-
-
@@ -851,69 +1001,6 @@ This commit is also listed under libpq and psql
-
- Improve sorting performance by using quicksort, not replacement
- selection sort, when performing external sort steps (Peter
- Geoghegan)
-
-
-
- The new approach makes better use of the CPU> cache
- for typical cache sizes and data volumes. Where necessary,
- the behavior can be adjusted via the new configuration parameter
- .
-
-
-
-
-
-
- Speed up text sorts where the same string occurs multiple times
- (Peter Geoghegan)
-
-
-
-
-
-
- Speed up sorting of uuid>, bytea>, and
- char(n)> fields by using abbreviated> keys
- (Peter Geoghegan)
-
-
-
- Support for abbreviated keys has also been
- added to the non-default operator classes text_pattern_ops>>,
- varchar_pattern_ops>, and
- bpchar_pattern_ops>. Processing of ordered-set
- aggregates can also now exploit abbreviated keys.
-
-
-
-
-
-
- Speed up CREATE INDEX CONCURRENTLY> by treating
- TID>s as 64-bit integers during sorting (Peter
- Geoghegan)
-
-
-
-
-
@@ -924,57 +1011,6 @@ This commit is also listed under libpq and psql
-
- Reduce contention for the ProcArrayLock> (Amit Kapila,
- Robert Haas)
-
-
-
-
-
-
- Improve performance by moving buffer content locks into the buffer
- descriptors (Andres Freund, Simon Riggs)
-
-
-
-
-
-
- Replace shared-buffer header spinlocks with atomic operations to
- improve scalability (Alexander Korotkov, Andres Freund)
-
-
-
-
-
-
- Use atomic operations, rather than a spinlock, to protect an
- LWLock>'s wait queue (Andres Freund)
-
-
-
-
-
-
- Partition the shared hash table freelist to reduce contention on
- multi-CPU>-socket servers (Aleksander Alekseev)
-
-
-
-
-
@@ -1017,6 +1053,21 @@ This commit is also listed under libpq and psql
+
+ Add pg_control_system()>>,
+ pg_control_checkpoint()>,
+ pg_control_recovery()>, and
+ pg_control_init()> functions to expose fields of
+ pg_control> to SQL> (Joe Conway, Michael
+ Paquier)
+
+
+
+
+
@@ -1082,21 +1133,6 @@ This commit is also listed under libpq and psql
-
- Add pg_control_system()>>,
- pg_control_checkpoint()>,
- pg_control_recovery()>, and
- pg_control_init()> functions to expose fields of
- pg_control> to SQL> (Joe Conway, Michael
- Paquier)
-
-
-
-
-
@@ -1219,18 +1255,18 @@ This commit is also listed under libpq and psql
- Add configure option
- This allows the use of systemd> service units of
- type notify>, which greatly simplifies the management
- of PostgreSQL> under systemd>.
+ This behavior is controlled by the new configuration parameter
+ . It can
+ be useful to prevent forgotten transactions from holding locks
+ or preventing vacuum cleanup for too long.
@@ -1247,18 +1283,18 @@ This commit is also listed under libpq and psql
- Allow sessions to be terminated automatically if they are in
- idle-in-transaction state for too long (Vik Fearing)
+ Add configure option
- This behavior is controlled by the new configuration parameter
- . It can
- be useful to prevent forgotten transactions from holding locks
- or preventing vacuum cleanup for too long.
+ This allows the use of systemd> service units of
+ type notify>, which greatly simplifies the management
+ of PostgreSQL> under systemd>.
@@ -1857,6 +1893,30 @@ XXX this is pending backpatch, may need to remove
+
+ Improve full-text search to support searching for phrases, that
+ is, lexemes appearing adjacent to each other in a specific order,
+ or with a specified distance between them (Teodor Sigaev, Oleg
+ Bartunov, Dmitry Ivanov)
+
+
+
+ A phrase-search query can be specified in tsquery>
+ input using the new operators <->> and
+ <N>>. The former means
+ that the lexemes before and after it must appear adjacent to
+ each other in that order. The latter means they must be exactly
+ N> lexemes apart.
+
+
+
+
+
@@ -1917,30 +1977,6 @@ XXX this is pending backpatch, may need to remove
-
- Improve full-text search to support searching for phrases, that
- is, lexemes appearing adjacent to each other in a specific order,
- or with a specified distance between them (Teodor Sigaev, Oleg
- Bartunov, Dmitry Ivanov)
-
-
-
- A phrase-search query can be specified in tsquery>
- input using the new operators <->> and
- <N>>. The former means
- that the lexemes before and after it must appear adjacent to
- each other in that order. The latter means they must be exactly
- N> lexemes apart.
-
-
-
-
-
+
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/src/sgml/stylesheet-speedup-xhtml.xsl b/doc/src/sgml/stylesheet-speedup-xhtml.xsl
new file mode 100644
index 0000000000..8428dc5870
--- /dev/null
+++ b/doc/src/sgml/stylesheet-speedup-xhtml.xsl
@@ -0,0 +1,252 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ,
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Error: If you change $chunk.section.depth, then you must update the performance-optimized chunk-all-sections-template.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/doc/src/sgml/stylesheet.xsl b/doc/src/sgml/stylesheet.xsl
index 7967b361dd..631fcc4edf 100644
--- a/doc/src/sgml/stylesheet.xsl
+++ b/doc/src/sgml/stylesheet.xsl
@@ -6,6 +6,7 @@
+
--
cgit v1.2.3
From 5285c5e873d8b622da7007c1628e5afa80f372fb Mon Sep 17 00:00:00 2001
From: Bruce Momjian
Date: Thu, 18 Aug 2016 21:41:10 -0400
Subject: doc: requirepeer is a way to avoid spoofing
We already mentioned unix_socket_directories as an option.
Reported-by: https://www.postgresql.org/message-id/45016837-6cf3-3136-f959-763d06a28076%402ndquadrant.com
Backpatch-through: 9.6
---
doc/src/sgml/runtime.sgml | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml
index 60a06590fe..98752c2875 100644
--- a/doc/src/sgml/runtime.sgml
+++ b/doc/src/sgml/runtime.sgml
@@ -1922,7 +1922,7 @@ pg_dumpall -p 5432 | psql -d postgres -p 5433
- The simplest way to prevent spoofing for local>
+ On way to prevent spoofing of local>
connections is to use a Unix domain socket directory () that has write permission only
for a trusted local user. This prevents a malicious user from creating
@@ -1934,6 +1934,13 @@ pg_dumpall -p 5432 | psql -d postgres -p 5433
/tmp> cleanup script to prevent removal of the symbolic link.
+
+ Another option for local> connections is for clients to use
+ requirepeer>>
+ to specify the required owner of the server process connected to
+ the socket.
+
+
To prevent spoofing on TCP connections, the best solution is to use
SSL certificates and make sure that clients check the server's certificate.
--
cgit v1.2.3
From f2e016f8d55ee029c5d6ec853ff6802aaf49fb0a Mon Sep 17 00:00:00 2001
From: Bruce Momjian
Date: Mon, 22 Aug 2016 17:20:44 -0400
Subject: doc: fix typo in recent patch
Reported-by: Jeff Janes
Backpatch-through: 9.6
---
doc/src/sgml/runtime.sgml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml
index 98752c2875..ef0139c365 100644
--- a/doc/src/sgml/runtime.sgml
+++ b/doc/src/sgml/runtime.sgml
@@ -1922,7 +1922,7 @@ pg_dumpall -p 5432 | psql -d postgres -p 5433
- On way to prevent spoofing of local>
+ One way to prevent spoofing of local>
connections is to use a Unix domain socket directory () that has write permission only
for a trusted local user. This prevents a malicious user from creating
--
cgit v1.2.3
From 86f31695f3b54211226949de519063bbf248e8c4 Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Tue, 23 Aug 2016 10:30:52 -0400
Subject: Add txid_current_ifassigned().
Add a variant of txid_current() that returns NULL if no transaction ID
is assigned. This version can be used even on a standby server,
although it will always return NULL since no transaction IDs can be
assigned during recovery.
Craig Ringer, per suggestion from Jim Nasby. Reviewed by Petr Jelinek
and by me.
---
doc/src/sgml/func.sgml | 9 +++++++++
src/backend/utils/adt/txid.c | 21 +++++++++++++++++++++
src/include/catalog/pg_proc.h | 2 ++
src/include/utils/builtins.h | 1 +
src/test/regress/expected/txid.out | 16 ++++++++++++++++
src/test/regress/sql/txid.sql | 7 +++++++
6 files changed, 56 insertions(+)
(limited to 'doc/src')
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 169a385a9c..6355300d9d 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -17119,6 +17119,10 @@ SELECT collation for ('foo' COLLATE "de_DE");
txid_current
+
+ txid_current_if_assigned
+
+
txid_current_snapshot
@@ -17159,6 +17163,11 @@ SELECT collation for ('foo' COLLATE "de_DE");
bigintget current transaction ID, assigning a new one if the current transaction does not have one
+
+ txid_current_if_assigned()
+ bigint
+ same as txid_current() but returns null instead of assigning an xid if none is already assigned
+ txid_current_snapshot()txid_snapshot
diff --git a/src/backend/utils/adt/txid.c b/src/backend/utils/adt/txid.c
index c2069a9923..276075e293 100644
--- a/src/backend/utils/adt/txid.c
+++ b/src/backend/utils/adt/txid.c
@@ -376,6 +376,27 @@ txid_current(PG_FUNCTION_ARGS)
PG_RETURN_INT64(val);
}
+/*
+ * Same as txid_current() but doesn't assign a new xid if there isn't one
+ * yet.
+ */
+Datum
+txid_current_if_assigned(PG_FUNCTION_ARGS)
+{
+ txid val;
+ TxidEpoch state;
+ TransactionId topxid = GetTopTransactionIdIfAny();
+
+ if (topxid == InvalidTransactionId)
+ PG_RETURN_NULL();
+
+ load_xid_epoch(&state);
+
+ val = convert_xid(topxid, &state);
+
+ PG_RETURN_INT64(val);
+}
+
/*
* txid_current_snapshot() returns txid_snapshot
*
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 6fed7a0d19..050a98c397 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4904,6 +4904,8 @@ DATA(insert OID = 2942 ( txid_snapshot_send PGNSP PGUID 12 1 0 0 0 f f f f t
DESCR("I/O");
DATA(insert OID = 2943 ( txid_current PGNSP PGUID 12 1 0 0 0 f f f f t f s u 0 0 20 "" _null_ _null_ _null_ _null_ _null_ txid_current _null_ _null_ _null_ ));
DESCR("get current transaction ID");
+DATA(insert OID = 3348 ( txid_current_if_assigned PGNSP PGUID 12 1 0 0 0 f f f f t f s u 0 0 20 "" _null_ _null_ _null_ _null_ _null_ txid_current_if_assigned _null_ _null_ _null_ ));
+DESCR("get current transaction ID");
DATA(insert OID = 2944 ( txid_current_snapshot PGNSP PGUID 12 1 0 0 0 f f f f t f s s 0 0 2970 "" _null_ _null_ _null_ _null_ _null_ txid_current_snapshot _null_ _null_ _null_ ));
DESCR("get current snapshot");
DATA(insert OID = 2945 ( txid_snapshot_xmin PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 20 "2970" _null_ _null_ _null_ _null_ _null_ txid_snapshot_xmin _null_ _null_ _null_ ));
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 40e25c8824..2ae212a9c3 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -1221,6 +1221,7 @@ extern Datum txid_snapshot_out(PG_FUNCTION_ARGS);
extern Datum txid_snapshot_recv(PG_FUNCTION_ARGS);
extern Datum txid_snapshot_send(PG_FUNCTION_ARGS);
extern Datum txid_current(PG_FUNCTION_ARGS);
+extern Datum txid_current_if_assigned(PG_FUNCTION_ARGS);
extern Datum txid_current_snapshot(PG_FUNCTION_ARGS);
extern Datum txid_snapshot_xmin(PG_FUNCTION_ARGS);
extern Datum txid_snapshot_xmax(PG_FUNCTION_ARGS);
diff --git a/src/test/regress/expected/txid.out b/src/test/regress/expected/txid.out
index ddd217eb10..802ccb949f 100644
--- a/src/test/regress/expected/txid.out
+++ b/src/test/regress/expected/txid.out
@@ -238,3 +238,19 @@ SELECT txid_snapshot '1:9223372036854775808:3';
ERROR: invalid input syntax for type txid_snapshot: "1:9223372036854775808:3"
LINE 1: SELECT txid_snapshot '1:9223372036854775808:3';
^
+-- test txid_current_if_assigned
+BEGIN;
+SELECT txid_current_if_assigned() IS NULL;
+ ?column?
+----------
+ t
+(1 row)
+
+SELECT txid_current() \gset
+SELECT txid_current_if_assigned() IS NOT DISTINCT FROM BIGINT :'txid_current';
+ ?column?
+----------
+ t
+(1 row)
+
+COMMIT;
diff --git a/src/test/regress/sql/txid.sql b/src/test/regress/sql/txid.sql
index b6650b922e..4aefd9e64d 100644
--- a/src/test/regress/sql/txid.sql
+++ b/src/test/regress/sql/txid.sql
@@ -52,3 +52,10 @@ select txid_visible_in_snapshot('1000100010001015', '1000100010001000:1000100010
-- test 64bit overflow
SELECT txid_snapshot '1:9223372036854775807:3';
SELECT txid_snapshot '1:9223372036854775808:3';
+
+-- test txid_current_if_assigned
+BEGIN;
+SELECT txid_current_if_assigned() IS NULL;
+SELECT txid_current() \gset
+SELECT txid_current_if_assigned() IS NOT DISTINCT FROM BIGINT :'txid_current';
+COMMIT;
--
cgit v1.2.3
From d2ddee63b43b27d6c6af169342af10db19bd3a1a Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Tue, 23 Aug 2016 12:10:25 -0400
Subject: Improve SP-GiST opclass API to better support unlabeled nodes.
Previously, the spgSplitTuple action could only create a new upper tuple
containing a single labeled node. This made it useless for opclasses
that prefer to work with fixed sets of nodes (labeled or otherwise),
which meant that restrictive prefixes could not be used with such
node definitions. Change the output field set for the choose() method
to allow it to specify any valid node set for the new upper tuple,
and to specify which of these nodes to place the modified lower tuple in.
In addition to its primary use for fixed node sets, this feature could
allow existing opclasses that use variable node sets to skip a separate
spgAddNode action when splitting a tuple, by setting up the node needed
for the incoming value as part of the spgSplitTuple action. However, care
would have to be taken to add the extra node only when it would not make
the tuple bigger than before. (spgAddNode can enlarge the tuple,
spgSplitTuple can't.)
This is a prerequisite for an upcoming SP-GiST inet opclass, but is
being committed separately to increase the visibility of the API change.
In passing, improve the documentation about the traverse-values feature
that was added by commit ccd6eb49a.
Emre Hasegeli, with cosmetic adjustments and documentation rework by me
Discussion:
---
doc/src/sgml/spgist.sgml | 115 ++++++++++++++++++--------------
src/backend/access/spgist/spgdoinsert.c | 39 +++++++++--
src/backend/access/spgist/spgtextproc.c | 12 +++-
src/include/access/spgist.h | 12 ++--
4 files changed, 115 insertions(+), 63 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/spgist.sgml b/doc/src/sgml/spgist.sgml
index f40c790612..dfa62adb55 100644
--- a/doc/src/sgml/spgist.sgml
+++ b/doc/src/sgml/spgist.sgml
@@ -114,7 +114,7 @@
box_ops>
- box
+ box><<>
&<>
@@ -183,11 +183,14 @@
Inner tuples are more complex, since they are branching points in the
search tree. Each inner tuple contains a set of one or more
nodes>, which represent groups of similar leaf values.
- A node contains a downlink that leads to either another, lower-level inner
- tuple, or a short list of leaf tuples that all lie on the same index page.
- Each node has a label> that describes it; for example,
+ A node contains a downlink that leads either to another, lower-level inner
+ tuple, or to a short list of leaf tuples that all lie on the same index page.
+ Each node normally has a label> that describes it; for example,
in a radix tree the node label could be the next character of the string
- value. Optionally, an inner tuple can have a prefix> value
+ value. (Alternatively, an operator class can omit the node labels, if it
+ works with a fixed set of nodes for all inner tuples;
+ see .)
+ Optionally, an inner tuple can have a prefix> value
that describes all its members. In a radix tree this could be the common
prefix of the represented strings. The prefix value is not necessarily
really a prefix, but can be any data needed by the operator class;
@@ -202,7 +205,8 @@
tuple, so the SP-GiST core provides the possibility for
operator classes to manage level counting while descending the tree.
There is also support for incrementally reconstructing the represented
- value when that is needed.
+ value when that is needed, and for passing down additional data (called
+ traverse values>) during a tree descent.
@@ -343,10 +347,13 @@ typedef struct spgChooseOut
} addNode;
struct /* results for spgSplitTuple */
{
- /* Info to form new inner tuple with one node */
+ /* Info to form new upper-level inner tuple with one child tuple */
bool prefixHasPrefix; /* tuple should have a prefix? */
Datum prefixPrefixDatum; /* if so, its value */
- Datum nodeLabel; /* node's label */
+ int prefixNNodes; /* number of nodes */
+ Datum *prefixNodeLabels; /* their labels (or NULL for
+ * no labels) */
+ int childNodeN; /* which node gets child tuple */
/* Info to form new lower-level inner tuple with all old nodes */
bool postfixHasPrefix; /* tuple should have a prefix? */
@@ -416,29 +423,33 @@ typedef struct spgChooseOut
set resultType> to spgSplitTuple>.
This action moves all the existing nodes into a new lower-level
inner tuple, and replaces the existing inner tuple with a tuple
- having a single node that links to the new lower-level inner tuple.
+ having a single downlink pointing to the new lower-level inner tuple.
Set prefixHasPrefix> to indicate whether the new
upper tuple should have a prefix, and if so set
prefixPrefixDatum> to the prefix value. This new
prefix value must be sufficiently less restrictive than the original
- to accept the new value to be indexed, and it should be no longer
- than the original prefix.
- Set nodeLabel> to the label to be used for the
- node that will point to the new lower-level inner tuple.
+ to accept the new value to be indexed.
+ Set prefixNNodes> to the number of nodes needed in the
+ new tuple, and set prefixNodeLabels> to a palloc'd array
+ holding their labels, or to NULL if node labels are not required.
+ Note that the total size of the new upper tuple must be no more
+ than the total size of the tuple it is replacing; this constrains
+ the lengths of the new prefix and new labels.
+ Set childNodeN> to the index (from zero) of the node
+ that will downlink to the new lower-level inner tuple.
Set postfixHasPrefix> to indicate whether the new
lower-level inner tuple should have a prefix, and if so set
postfixPrefixDatum> to the prefix value. The
- combination of these two prefixes and the additional label must
- have the same meaning as the original prefix, because there is
- no opportunity to alter the node labels that are moved to the new
- lower-level tuple, nor to change any child index entries.
+ combination of these two prefixes and the downlink node's label
+ (if any) must have the same meaning as the original prefix, because
+ there is no opportunity to alter the node labels that are moved to
+ the new lower-level tuple, nor to change any child index entries.
After the node has been split, the choose
function will be called again with the replacement inner tuple.
- That call will usually result in an spgAddNode> result,
- since presumably the node label added in the split step will not
- match the new value; so after that, there will be a third call
- that finally returns spgMatchNode> and allows the
- insertion to descend to the leaf level.
+ That call may return an spgAddNode> result, if no suitable
+ node was created by the spgSplitTuple> action. Eventually
+ choose must return spgMatchNode> to
+ allow the insertion to descend to the next level.
@@ -492,9 +503,8 @@ typedef struct spgPickSplitOut
prefixDatum> to the prefix value.
Set nNodes> to indicate the number of nodes that
the new inner tuple will contain, and
- set nodeLabels> to an array of their label values.
- (If the nodes do not require labels, set nodeLabels>
- to NULL; see for details.)
+ set nodeLabels> to an array of their label values,
+ or to NULL if node labels are not required.
Set mapTuplesToNodes> to an array that gives the index
(from zero) of the node that each leaf tuple should be assigned to.
Set leafTupleDatums> to an array of the values to
@@ -561,7 +571,7 @@ typedef struct spgInnerConsistentIn
Datum reconstructedValue; /* value reconstructed at parent */
void *traversalValue; /* opclass-specific traverse value */
- MemoryContext traversalMemoryContext;
+ MemoryContext traversalMemoryContext; /* put new traverse values here */
int level; /* current level (counting from zero) */
bool returnData; /* original data must be returned? */
@@ -580,7 +590,6 @@ typedef struct spgInnerConsistentOut
int *levelAdds; /* increment level by this much for each */
Datum *reconstructedValues; /* associated reconstructed values */
void **traversalValues; /* opclass-specific traverse values */
-
} spgInnerConsistentOut;
@@ -599,6 +608,11 @@ typedef struct spgInnerConsistentOut
parent tuple; it is (Datum) 0> at the root level or if the
inner_consistent> function did not provide a value at the
parent level.
+ traversalValue> is a pointer to any traverse data
+ passed down from the previous call of inner_consistent>
+ on the parent index tuple, or NULL at the root level.
+ traversalMemoryContext> is the memory context in which
+ to store output traverse values (see below).
level> is the current inner tuple's level, starting at
zero for the root level.
returnData> is true> if reconstructed data is
@@ -615,9 +629,6 @@ typedef struct spgInnerConsistentOut
inner tuple, and
nodeLabels> is an array of their label values, or
NULL if the nodes do not have labels.
- traversalValue> is a pointer to data that
- inner_consistent> gets when called on child nodes from an
- outer call of inner_consistent> on parent nodes.
@@ -633,17 +644,19 @@ typedef struct spgInnerConsistentOut
reconstructedValues> to an array of the values
reconstructed for each child node to be visited; otherwise, leave
reconstructedValues> as NULL.
+ If it is desired to pass down additional out-of-band information
+ (traverse values>) to lower levels of the tree search,
+ set traversalValues> to an array of the appropriate
+ traverse values, one for each child node to be visited; otherwise,
+ leave traversalValues> as NULL.
Note that the inner_consistent> function is
responsible for palloc'ing the
- nodeNumbers>, levelAdds> and
- reconstructedValues> arrays.
- Sometimes accumulating some information is needed, while
- descending from parent to child node was happened. In this case
- traversalValues> array keeps pointers to
- specific data you need to accumulate for every child node.
- Memory for traversalValues> should be allocated in
- the default context, but each element of it should be allocated in
- traversalMemoryContext>.
+ nodeNumbers>, levelAdds>,
+ reconstructedValues>, and
+ traversalValues> arrays in the current memory context.
+ However, any output traverse values pointed to by
+ the traversalValues> array should be allocated
+ in traversalMemoryContext>.
@@ -670,8 +683,8 @@ typedef struct spgLeafConsistentIn
ScanKey scankeys; /* array of operators and comparison values */
int nkeys; /* length of array */
- void *traversalValue; /* opclass-specific traverse value */
Datum reconstructedValue; /* value reconstructed at parent */
+ void *traversalValue; /* opclass-specific traverse value */
int level; /* current level (counting from zero) */
bool returnData; /* original data must be returned? */
@@ -700,6 +713,9 @@ typedef struct spgLeafConsistentOut
parent tuple; it is (Datum) 0> at the root level or if the
inner_consistent> function did not provide a value at the
parent level.
+ traversalValue> is a pointer to any traverse data
+ passed down from the previous call of inner_consistent>
+ on the parent index tuple, or NULL at the root level.
level> is the current leaf tuple's level, starting at
zero for the root level.
returnData> is true> if reconstructed data is
@@ -797,7 +813,10 @@ typedef struct spgLeafConsistentOut
point. In such a case the code typically works with the nodes by
number, and there is no need for explicit node labels. To suppress
node labels (and thereby save some space), the picksplit>
- function can return NULL for the nodeLabels> array.
+ function can return NULL for the nodeLabels> array,
+ and likewise the choose> function can return NULL for
+ the prefixNodeLabels> array during
+ a spgSplitTuple> action.
This will in turn result in nodeLabels> being NULL during
subsequent calls to choose> and inner_consistent>.
In principle, node labels could be used for some inner tuples and omitted
@@ -807,10 +826,7 @@ typedef struct spgLeafConsistentOut
When working with an inner tuple having unlabeled nodes, it is an error
for choose> to return spgAddNode>, since the set
- of nodes is supposed to be fixed in such cases. Also, there is no
- provision for generating an unlabeled node in spgSplitTuple>
- actions, since it is expected that an spgAddNode> action will
- be needed as well.
+ of nodes is supposed to be fixed in such cases.
@@ -859,11 +875,10 @@ typedef struct spgLeafConsistentOut
The PostgreSQL source distribution includes
- several examples of index operator classes for
- SP-GiST. The core system currently provides radix
- trees over text columns and two types of trees over points: quad-tree and
- k-d tree. Look into src/backend/access/spgist/> to see the
- code.
+ several examples of index operator classes for SP-GiST,
+ as described in . Look
+ into src/backend/access/spgist/>
+ and src/backend/utils/adt/> to see the code.
diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c
index f090ca528b..6fc04b224d 100644
--- a/src/backend/access/spgist/spgdoinsert.c
+++ b/src/backend/access/spgist/spgdoinsert.c
@@ -1705,17 +1705,40 @@ spgSplitNodeAction(Relation index, SpGistState *state,
/* Should not be applied to nulls */
Assert(!SpGistPageStoresNulls(current->page));
+ /* Check opclass gave us sane values */
+ if (out->result.splitTuple.prefixNNodes <= 0 ||
+ out->result.splitTuple.prefixNNodes > SGITMAXNNODES)
+ elog(ERROR, "invalid number of prefix nodes: %d",
+ out->result.splitTuple.prefixNNodes);
+ if (out->result.splitTuple.childNodeN < 0 ||
+ out->result.splitTuple.childNodeN >=
+ out->result.splitTuple.prefixNNodes)
+ elog(ERROR, "invalid child node number: %d",
+ out->result.splitTuple.childNodeN);
+
/*
- * Construct new prefix tuple, containing a single node with the specified
- * label. (We'll update the node's downlink to point to the new postfix
- * tuple, below.)
+ * Construct new prefix tuple with requested number of nodes. We'll fill
+ * in the childNodeN'th node's downlink below.
*/
- node = spgFormNodeTuple(state, out->result.splitTuple.nodeLabel, false);
+ nodes = (SpGistNodeTuple *) palloc(sizeof(SpGistNodeTuple) *
+ out->result.splitTuple.prefixNNodes);
+
+ for (i = 0; i < out->result.splitTuple.prefixNNodes; i++)
+ {
+ Datum label = (Datum) 0;
+ bool labelisnull;
+
+ labelisnull = (out->result.splitTuple.prefixNodeLabels == NULL);
+ if (!labelisnull)
+ label = out->result.splitTuple.prefixNodeLabels[i];
+ nodes[i] = spgFormNodeTuple(state, label, labelisnull);
+ }
prefixTuple = spgFormInnerTuple(state,
out->result.splitTuple.prefixHasPrefix,
out->result.splitTuple.prefixPrefixDatum,
- 1, &node);
+ out->result.splitTuple.prefixNNodes,
+ nodes);
/* it must fit in the space that innerTuple now occupies */
if (prefixTuple->size > innerTuple->size)
@@ -1807,10 +1830,12 @@ spgSplitNodeAction(Relation index, SpGistState *state,
* the postfix tuple first.) We have to update the local copy of the
* prefixTuple too, because that's what will be written to WAL.
*/
- spgUpdateNodeLink(prefixTuple, 0, postfixBlkno, postfixOffset);
+ spgUpdateNodeLink(prefixTuple, out->result.splitTuple.childNodeN,
+ postfixBlkno, postfixOffset);
prefixTuple = (SpGistInnerTuple) PageGetItem(current->page,
PageGetItemId(current->page, current->offnum));
- spgUpdateNodeLink(prefixTuple, 0, postfixBlkno, postfixOffset);
+ spgUpdateNodeLink(prefixTuple, out->result.splitTuple.childNodeN,
+ postfixBlkno, postfixOffset);
MarkBufferDirty(current->buffer);
diff --git a/src/backend/access/spgist/spgtextproc.c b/src/backend/access/spgist/spgtextproc.c
index e0d8f30ef1..852a9b00fa 100644
--- a/src/backend/access/spgist/spgtextproc.c
+++ b/src/backend/access/spgist/spgtextproc.c
@@ -212,9 +212,14 @@ spg_text_choose(PG_FUNCTION_ARGS)
out->result.splitTuple.prefixPrefixDatum =
formTextDatum(prefixStr, commonLen);
}
- out->result.splitTuple.nodeLabel =
+ out->result.splitTuple.prefixNNodes = 1;
+ out->result.splitTuple.prefixNodeLabels =
+ (Datum *) palloc(sizeof(Datum));
+ out->result.splitTuple.prefixNodeLabels[0] =
Int16GetDatum(*(unsigned char *) (prefixStr + commonLen));
+ out->result.splitTuple.childNodeN = 0;
+
if (prefixSize - commonLen == 1)
{
out->result.splitTuple.postfixHasPrefix = false;
@@ -280,7 +285,10 @@ spg_text_choose(PG_FUNCTION_ARGS)
out->resultType = spgSplitTuple;
out->result.splitTuple.prefixHasPrefix = in->hasPrefix;
out->result.splitTuple.prefixPrefixDatum = in->prefixDatum;
- out->result.splitTuple.nodeLabel = Int16GetDatum(-2);
+ out->result.splitTuple.prefixNNodes = 1;
+ out->result.splitTuple.prefixNodeLabels = (Datum *) palloc(sizeof(Datum));
+ out->result.splitTuple.prefixNodeLabels[0] = Int16GetDatum(-2);
+ out->result.splitTuple.childNodeN = 0;
out->result.splitTuple.postfixHasPrefix = false;
}
else
diff --git a/src/include/access/spgist.h b/src/include/access/spgist.h
index f39a2d6938..a953a5a401 100644
--- a/src/include/access/spgist.h
+++ b/src/include/access/spgist.h
@@ -90,10 +90,13 @@ typedef struct spgChooseOut
} addNode;
struct /* results for spgSplitTuple */
{
- /* Info to form new inner tuple with one node */
+ /* Info to form new upper-level inner tuple with one child tuple */
bool prefixHasPrefix; /* tuple should have a prefix? */
Datum prefixPrefixDatum; /* if so, its value */
- Datum nodeLabel; /* node's label */
+ int prefixNNodes; /* number of nodes */
+ Datum *prefixNodeLabels; /* their labels (or NULL for
+ * no labels) */
+ int childNodeN; /* which node gets child tuple */
/* Info to form new lower-level inner tuple with all old nodes */
bool postfixHasPrefix; /* tuple should have a prefix? */
@@ -134,7 +137,8 @@ typedef struct spgInnerConsistentIn
Datum reconstructedValue; /* value reconstructed at parent */
void *traversalValue; /* opclass-specific traverse value */
- MemoryContext traversalMemoryContext;
+ MemoryContext traversalMemoryContext; /* put new traverse values
+ * here */
int level; /* current level (counting from zero) */
bool returnData; /* original data must be returned? */
@@ -163,8 +167,8 @@ typedef struct spgLeafConsistentIn
ScanKey scankeys; /* array of operators and comparison values */
int nkeys; /* length of array */
- void *traversalValue; /* opclass-specific traverse value */
Datum reconstructedValue; /* value reconstructed at parent */
+ void *traversalValue; /* opclass-specific traverse value */
int level; /* current level (counting from zero) */
bool returnData; /* original data must be returned? */
--
cgit v1.2.3
From ff066481b0485b1a4e414de3abcaae0bda02b1e1 Mon Sep 17 00:00:00 2001
From: Bruce Momjian
Date: Tue, 23 Aug 2016 12:45:33 -0400
Subject: doc: fix incorrect 'literal' tags
Discussion: dcc4113d-1eda-4f60-d1c5-f50eee160bad@gmail.com
Author: Alexander Law
Backpatch-through: 9.6
---
doc/src/sgml/pgstandby.sgml | 2 +-
doc/src/sgml/ref/pg_xlogdump.sgml | 2 +-
doc/src/sgml/ref/pgbench.sgml | 2 +-
doc/src/sgml/ref/psql-ref.sgml | 4 ++--
4 files changed, 5 insertions(+), 5 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/pgstandby.sgml b/doc/src/sgml/pgstandby.sgml
index fb3f32eaaa..80c6f60062 100644
--- a/doc/src/sgml/pgstandby.sgml
+++ b/doc/src/sgml/pgstandby.sgml
@@ -363,7 +363,7 @@ recovery_end_command = 'del C:\pgsql.trigger.5442'
The copy> command on Windows sets the final file size
before the file is completely copied, which would ordinarily confuse
pg_standby. Therefore
- pg_standby waits sleeptime>
+ pg_standby waits sleeptime>
seconds once it sees the proper file size. GNUWin32's cp>
sets the file size only after the file copy is complete.
diff --git a/doc/src/sgml/ref/pg_xlogdump.sgml b/doc/src/sgml/ref/pg_xlogdump.sgml
index 296f1acc24..177caab00d 100644
--- a/doc/src/sgml/ref/pg_xlogdump.sgml
+++ b/doc/src/sgml/ref/pg_xlogdump.sgml
@@ -153,7 +153,7 @@ PostgreSQL documentation
Timeline from which to read log records. The default is to use the
- value in startseg>, if that is specified; otherwise, the
+ value in startseg>, if that is specified; otherwise, the
default is 1.
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index f58da351f9..285608d508 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -433,7 +433,7 @@ pgbench options> dbname>
sec>
- Show progress report every sec> seconds. The report
+ Show progress report every sec> seconds. The report
includes the time since the beginning of the run, the tps since the
last report, and the transaction latency average and standard
deviation since the last report. Under throttling (
- If this is on, you should create users as username@dbname>.
- When username> is passed by a connecting client,
+ If this is on, you should create users as username@dbname>.
+ When username> is passed by a connecting client,
@> and the database name are appended to the user
name and that database-specific user name is looked up by the
server. Note that when you create users with names containing
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 6355300d9d..5c1c4f69fb 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -13800,7 +13800,7 @@ SELECT xmlagg(x) FROM (SELECT x FROM test ORDER BY y DESC) AS tab;
No
multiple continuous percentile: returns an array of results matching
- the shape of the fractions parameter, with each
+ the shape of the fractions parameter, with each
non-null element replaced by the value corresponding to that percentile
@@ -13845,7 +13845,7 @@ SELECT xmlagg(x) FROM (SELECT x FROM test ORDER BY y DESC) AS tab;
No
multiple discrete percentile: returns an array of results matching the
- shape of the fractions parameter, with each non-null
+ shape of the fractions parameter, with each non-null
element replaced by the input value corresponding to that percentile
@@ -16850,7 +16850,7 @@ SELECT pg_type_is_visible('myschema.widget'::regtype);
pg_options_to_table returns the set of storage
option name/value pairs
- (option_name>/option_value>) when passed
+ (option_name>/option_value>) when passed
pg_class>.reloptions> or
pg_attribute>.attoptions>.
diff --git a/doc/src/sgml/ref/create_sequence.sgml b/doc/src/sgml/ref/create_sequence.sgml
index c9591462ee..62ae379226 100644
--- a/doc/src/sgml/ref/create_sequence.sgml
+++ b/doc/src/sgml/ref/create_sequence.sgml
@@ -349,7 +349,7 @@ END;
- The standard's AS <data type> expression is not
+ The standard's AS data_type> expression is not
supported.
diff --git a/doc/src/sgml/ref/set_role.sgml b/doc/src/sgml/ref/set_role.sgml
index aff3792199..a97ceabcff 100644
--- a/doc/src/sgml/ref/set_role.sgml
+++ b/doc/src/sgml/ref/set_role.sgml
@@ -127,7 +127,7 @@ SELECT SESSION_USER, CURRENT_USER;
PostgreSQL
- allows identifier syntax ("rolename"), while
+ allows identifier syntax ("rolename>"), while
the SQL standard requires the role name to be written as a string
literal. SQL does not allow this command during a transaction;
PostgreSQL does not make this
diff --git a/doc/src/sgml/ref/set_session_auth.sgml b/doc/src/sgml/ref/set_session_auth.sgml
index 4ac2128950..96d279aaf9 100644
--- a/doc/src/sgml/ref/set_session_auth.sgml
+++ b/doc/src/sgml/ref/set_session_auth.sgml
@@ -101,7 +101,7 @@ SELECT SESSION_USER, CURRENT_USER;
The SQL standard allows some other expressions to appear in place
of the literal user_name, but these options
are not important in practice. PostgreSQL
- allows identifier syntax ("username"), which SQL
+ allows identifier syntax ("username>"), which SQL
does not. SQL does not allow this command during a transaction;
PostgreSQL does not make this
restriction because there is no reason to.
diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml
index be5974a4ff..5a70d7db80 100644
--- a/doc/src/sgml/textsearch.sgml
+++ b/doc/src/sgml/textsearch.sgml
@@ -3622,10 +3622,10 @@ SELECT plainto_tsquery('supernovae stars');
- The optional parameter PATTERN can be the name of
+ The optional parameter PATTERN can be the name of
a text search object, optionally schema-qualified. If
- PATTERN is omitted then information about all
- visible objects will be displayed. PATTERN can be a
+ PATTERN is omitted then information about all
+ visible objects will be displayed. PATTERN can be a
regular expression and can provide separate patterns
for the schema and object names. The following examples illustrate this:
diff --git a/doc/src/sgml/xfunc.sgml b/doc/src/sgml/xfunc.sgml
index d8d2e9e490..de6a466efc 100644
--- a/doc/src/sgml/xfunc.sgml
+++ b/doc/src/sgml/xfunc.sgml
@@ -204,8 +204,8 @@ SELECT clean_emp();
If an argument is of a composite type, then the dot notation,
- e.g., argname.fieldname or
- $1.fieldname, can be used to access attributes of the
+ e.g., argname>.fieldname> or
+ $1.fieldname>, can be used to access attributes of the
argument. Again, you might need to qualify the argument's name with the
function name to make the form with an argument name unambiguous.
@@ -527,7 +527,8 @@ LINE 1: SELECT new_emp().name;
Another option is to use
functional notation for extracting an attribute. The simple way
to explain this is that we can use the
- notations attribute(table)> and table.attribute>
+ notations attribute>(table>)>
+ and table>.attribute>>
interchangeably.
@@ -1305,12 +1306,15 @@ CREATE FUNCTION test(smallint, double precision) RETURNS ...
A function that takes a single argument of a composite type should
generally not have the same name as any attribute (field) of that type.
- Recall that attribute(table) is considered equivalent
- to table.attribute. In the case that there is an
+ Recall that attribute>(table>)
+ is considered equivalent
+ to table>.attribute>.
+ In the case that there is an
ambiguity between a function on a composite type and an attribute of
the composite type, the attribute will always be used. It is possible
to override that choice by schema-qualifying the function name
- (that is, schema.func(table)) but it's better to
+ (that is, schema>.func>(table>)
+ ) but it's better to
avoid the problem by not choosing conflicting names.
@@ -2818,7 +2822,7 @@ HeapTuple heap_form_tuple(TupleDesc tupdesc, Datum *values, bool *isnull)
HeapTuple BuildTupleFromCStrings(AttInMetadata *attinmeta, char **values)
to build a HeapTuple> given user data
- in C string form. values is an array of C strings,
+ in C string form. values is an array of C strings,
one for each attribute of the return row. Each C string should be in
the form expected by the input function of the attribute data
type. In order to return a null value for one of the attributes,
--
cgit v1.2.3
From 26fa446da64716f12ab3a623434c644fcb344b2e Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Fri, 26 Aug 2016 16:20:17 -0400
Subject: Add a nonlocalized version of the severity field to client error
messages.
This has been requested a few times, but the use-case for it was never
entirely clear. The reason for adding it now is that transmission of
error reports from parallel workers fails when NLS is active, because
pq_parse_errornotice() wrongly assumes that the existing severity field
is nonlocalized. There are other ways we could have fixed that, but the
other options were basically kluges, whereas this way provides something
that's at least arguably a useful feature along with the bug fix.
Per report from Jakob Egger. Back-patch into 9.6, because otherwise
parallel query is essentially unusable in non-English locales. The
problem exists in 9.5 as well, but we don't want to risk changing
on-the-wire behavior in 9.5 (even though the possibility of new error
fields is specifically called out in the protocol document). It may
be sufficient to leave the issue unfixed in 9.5, given the very limited
usefulness of pq_parse_errornotice in that version.
Discussion:
---
doc/src/sgml/libpq.sgml | 16 ++++++++++++++++
doc/src/sgml/protocol.sgml | 19 +++++++++++++++++++
src/backend/libpq/pqmq.c | 26 +++++++++++++++++++++-----
src/backend/utils/error/elog.c | 33 ++++++++++++++++++++-------------
src/include/postgres_ext.h | 1 +
src/interfaces/libpq/fe-exec.c | 1 +
6 files changed, 78 insertions(+), 18 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml
index f22e3da047..2f9350b10e 100644
--- a/doc/src/sgml/libpq.sgml
+++ b/doc/src/sgml/libpq.sgml
@@ -2767,6 +2767,22 @@ char *PQresultErrorField(const PGresult *res, int fieldcode);
+
+ PG_DIAG_SEVERITY_NONLOCALIZED>
+
+
+ The severity; the field contents are ERROR>,
+ FATAL>, or PANIC> (in an error message),
+ or WARNING>, NOTICE>, DEBUG>,
+ INFO>, or LOG> (in a notice message).
+ This is identical to the PG_DIAG_SEVERITY> field except
+ that the contents are never localized. This is present only in
+ reports generated by PostgreSQL> versions 9.6
+ and later.
+
+
+
+
PG_DIAG_SQLSTATE>
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index 9c96d8fc44..68b0941029 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -4882,6 +4882,25 @@ message.
+
+
+V>
+
+
+
+ Severity: the field contents are
+ ERROR>, FATAL>, or
+ PANIC> (in an error message), or
+ WARNING>, NOTICE>, DEBUG>,
+ INFO>, or LOG> (in a notice message).
+ This is identical to the S> field except
+ that the contents are never localized. This is present only in
+ messages generated by PostgreSQL> versions 9.6
+ and later.
+
+
+
+
C>
diff --git a/src/backend/libpq/pqmq.c b/src/backend/libpq/pqmq.c
index 921242fbc4..bfe66c6c44 100644
--- a/src/backend/libpq/pqmq.c
+++ b/src/backend/libpq/pqmq.c
@@ -237,10 +237,26 @@ pq_parse_errornotice(StringInfo msg, ErrorData *edata)
switch (code)
{
case PG_DIAG_SEVERITY:
+ /* ignore, trusting we'll get a nonlocalized version */
+ break;
+ case PG_DIAG_SEVERITY_NONLOCALIZED:
if (strcmp(value, "DEBUG") == 0)
- edata->elevel = DEBUG1; /* or some other DEBUG level */
+ {
+ /*
+ * We can't reconstruct the exact DEBUG level, but
+ * presumably it was >= client_min_messages, so select
+ * DEBUG1 to ensure we'll pass it on to the client.
+ */
+ edata->elevel = DEBUG1;
+ }
else if (strcmp(value, "LOG") == 0)
- edata->elevel = LOG; /* can't be COMMERROR */
+ {
+ /*
+ * It can't be LOG_SERVER_ONLY, or the worker wouldn't
+ * have sent it to us; so LOG is the correct value.
+ */
+ edata->elevel = LOG;
+ }
else if (strcmp(value, "INFO") == 0)
edata->elevel = INFO;
else if (strcmp(value, "NOTICE") == 0)
@@ -254,11 +270,11 @@ pq_parse_errornotice(StringInfo msg, ErrorData *edata)
else if (strcmp(value, "PANIC") == 0)
edata->elevel = PANIC;
else
- elog(ERROR, "unknown error severity");
+ elog(ERROR, "unrecognized error severity: \"%s\"", value);
break;
case PG_DIAG_SQLSTATE:
if (strlen(value) != 5)
- elog(ERROR, "malformed sql state");
+ elog(ERROR, "invalid SQLSTATE: \"%s\"", value);
edata->sqlerrcode = MAKE_SQLSTATE(value[0], value[1], value[2],
value[3], value[4]);
break;
@@ -308,7 +324,7 @@ pq_parse_errornotice(StringInfo msg, ErrorData *edata)
edata->funcname = pstrdup(value);
break;
default:
- elog(ERROR, "unknown error field: %d", (int) code);
+ elog(ERROR, "unrecognized error field code: %d", (int) code);
break;
}
}
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index 03c4a39761..224ee7801c 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -2753,7 +2753,7 @@ write_csvlog(ErrorData *edata)
appendStringInfoChar(&buf, ',');
/* Error severity */
- appendStringInfoString(&buf, error_severity(edata->elevel));
+ appendStringInfoString(&buf, _(error_severity(edata->elevel)));
appendStringInfoChar(&buf, ',');
/* SQL state code */
@@ -2870,7 +2870,7 @@ send_message_to_server_log(ErrorData *edata)
formatted_log_time[0] = '\0';
log_line_prefix(&buf, edata);
- appendStringInfo(&buf, "%s: ", error_severity(edata->elevel));
+ appendStringInfo(&buf, "%s: ", _(error_severity(edata->elevel)));
if (Log_error_verbosity >= PGERROR_VERBOSE)
appendStringInfo(&buf, "%s: ", unpack_sql_state(edata->sqlerrcode));
@@ -3153,12 +3153,16 @@ send_message_to_frontend(ErrorData *edata)
if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
{
/* New style with separate fields */
+ const char *sev;
char tbuf[12];
int ssval;
int i;
+ sev = error_severity(edata->elevel);
pq_sendbyte(&msgbuf, PG_DIAG_SEVERITY);
- err_sendstring(&msgbuf, error_severity(edata->elevel));
+ err_sendstring(&msgbuf, _(sev));
+ pq_sendbyte(&msgbuf, PG_DIAG_SEVERITY_NONLOCALIZED);
+ err_sendstring(&msgbuf, sev);
/* unpack MAKE_SQLSTATE code */
ssval = edata->sqlerrcode;
@@ -3277,7 +3281,7 @@ send_message_to_frontend(ErrorData *edata)
initStringInfo(&buf);
- appendStringInfo(&buf, "%s: ", error_severity(edata->elevel));
+ appendStringInfo(&buf, "%s: ", _(error_severity(edata->elevel)));
if (edata->show_funcname && edata->funcname)
appendStringInfo(&buf, "%s: ", edata->funcname);
@@ -3587,7 +3591,10 @@ get_errno_symbol(int errnum)
/*
- * error_severity --- get localized string representing elevel
+ * error_severity --- get string representing elevel
+ *
+ * The string is not localized here, but we mark the strings for translation
+ * so that callers can invoke _() on the result.
*/
static const char *
error_severity(int elevel)
@@ -3601,29 +3608,29 @@ error_severity(int elevel)
case DEBUG3:
case DEBUG4:
case DEBUG5:
- prefix = _("DEBUG");
+ prefix = gettext_noop("DEBUG");
break;
case LOG:
case LOG_SERVER_ONLY:
- prefix = _("LOG");
+ prefix = gettext_noop("LOG");
break;
case INFO:
- prefix = _("INFO");
+ prefix = gettext_noop("INFO");
break;
case NOTICE:
- prefix = _("NOTICE");
+ prefix = gettext_noop("NOTICE");
break;
case WARNING:
- prefix = _("WARNING");
+ prefix = gettext_noop("WARNING");
break;
case ERROR:
- prefix = _("ERROR");
+ prefix = gettext_noop("ERROR");
break;
case FATAL:
- prefix = _("FATAL");
+ prefix = gettext_noop("FATAL");
break;
case PANIC:
- prefix = _("PANIC");
+ prefix = gettext_noop("PANIC");
break;
default:
prefix = "???";
diff --git a/src/include/postgres_ext.h b/src/include/postgres_ext.h
index 74c344c704..ae2f087798 100644
--- a/src/include/postgres_ext.h
+++ b/src/include/postgres_ext.h
@@ -49,6 +49,7 @@ typedef PG_INT64_TYPE pg_int64;
* applications.
*/
#define PG_DIAG_SEVERITY 'S'
+#define PG_DIAG_SEVERITY_NONLOCALIZED 'V'
#define PG_DIAG_SQLSTATE 'C'
#define PG_DIAG_MESSAGE_PRIMARY 'M'
#define PG_DIAG_MESSAGE_DETAIL 'D'
diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c
index d1b91c841c..a9ba54628f 100644
--- a/src/interfaces/libpq/fe-exec.c
+++ b/src/interfaces/libpq/fe-exec.c
@@ -824,6 +824,7 @@ pqInternalNotice(const PGNoticeHooks *hooks, const char *fmt,...)
*/
pqSaveMessageField(res, PG_DIAG_MESSAGE_PRIMARY, msgBuf);
pqSaveMessageField(res, PG_DIAG_SEVERITY, libpq_gettext("NOTICE"));
+ pqSaveMessageField(res, PG_DIAG_SEVERITY_NONLOCALIZED, "NOTICE");
/* XXX should provide a SQLSTATE too? */
/*
--
cgit v1.2.3
From a6f0dc701b2f84839761783e87c49d43cd3e31df Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Sun, 28 Aug 2016 12:37:23 -0400
Subject: Update 9.6 release notes through today.
---
doc/src/sgml/config.sgml | 24 ++++++---------
doc/src/sgml/release-9.6.sgml | 71 +++++++++++++++++++++++++++++++++++++++----
2 files changed, 75 insertions(+), 20 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 4db9c81e56..7c483c6ef3 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -5388,9 +5388,10 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
Process Title
- These settings control how the process title as seen
- by ps is modified. See
- for details.
+ These settings control how process titles of server processes are
+ modified. Process titles are typically viewed using programs like
+ ps> or, on Windows, Process Explorer>.
+ See for details.
@@ -5403,18 +5404,14 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
Sets the cluster name that appears in the process title for all
- processes in this cluster. The name can be any string of less than
- NAMEDATALEN> characters (64 characters in a standard
+ server processes in this cluster. The name can be any string of less
+ than NAMEDATALEN> characters (64 characters in a standard
build). Only printable ASCII characters may be used in the
cluster_name value. Other characters will be
replaced with question marks (?). No name is shown
if this parameter is set to the empty string ''> (which is
the default). This parameter can only be set at server start.
-
- The process title is typically viewed using programs like
- ps> or, on Windows, Process Explorer>.
-
@@ -5427,11 +5424,10 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
Enables updating of the process title every time a new SQL command
- is received by the server. The process title is typically viewed
- by the ps> command,
- or in Windows by using the Process Explorer>.
- This value defaults to off on Windows platforms due to the
- platform's significant overhead for updating the process title.
+ is received by the server.
+ This setting defaults to on> on most platforms, but it
+ defaults to off> on Windows due to that platform's larger
+ overhead for updating the process title.
Only superusers can change this setting.
diff --git a/doc/src/sgml/release-9.6.sgml b/doc/src/sgml/release-9.6.sgml
index 8d7356e27f..6ec7583485 100644
--- a/doc/src/sgml/release-9.6.sgml
+++ b/doc/src/sgml/release-9.6.sgml
@@ -7,7 +7,7 @@
Release Date2016-??-??
- Current as of 2016-08-08 (commit 34927b292)
+ Current as of 2016-08-27 (commit b9fe6cbc8)
@@ -348,6 +348,7 @@ This commit is also listed under libpq and psql
2016-04-27 [59eb55127] Fix EXPLAIN VERBOSE output for parallel aggregate.
2016-06-09 [c9ce4a1c6] Eliminate "parallel degree" terminology.
2016-06-16 [75be66464] Invent min_parallel_relation_size GUC to replace a hard-
+2016-08-16 [f85b1a841] Disable parallel query by default.
-->
Parallel queries (Robert Haas, Amit Kapila, David Rowley,
@@ -365,9 +366,11 @@ This commit is also listed under libpq and psql
- Use of parallel query execution can be controlled
- through the new configuration parameters ,
+ Parallel query execution is not (yet) enabled by default.
+ To allow it, set the new configuration
+ parameter to a
+ value larger than zero. Additional control over use of parallelism
+ is available through other new configuration parameters
,
, , and
+
+
+
+ Disable by default on
+ Windows (Takayuki Tsunakawa)
+
+
+
+ The overhead of updating the process title is much larger on Windows
+ than most other platforms, and it is also less useful to do it since
+ most Windows users do not have tools that can display process titles.
+
+
+
@@ -2431,6 +2450,22 @@ XXX this is pending backpatch, may need to remove
+
+ Add a nonlocalized version of the severity field in error and notice
+ messages (Tom Lane)
+
+
+
+ This change allows client code to determine severity of an error or
+ notice without having to worry about localized variants of the
+ severity strings.
+
+
+
+
+
@@ -2957,6 +2992,25 @@ This commit is also listed under libpq and PL/pgSQL
+
+ Add macros to make AllocSetContextCreate()> calls simpler
+ and safer (Tom Lane)
+
+
+
+ Writing out the individual sizing parameters for a memory context
+ is now deprecated in favor of using one of the new
+ macros ALLOCSET_DEFAULT_SIZES>,
+ ALLOCSET_SMALL_SIZES>,
+ or ALLOCSET_START_SMALL_SIZES>.
+ Existing code continues to work, however.
+
+
+
+
+
@@ -3038,19 +3092,24 @@ This commit is also listed under libpq and PL/pgSQL
Restructure index access method API> to hide most of
- it at the C> level (Alexander Korotkov)
+ it at the C> level (Alexander Korotkov, Andrew Gierth)
This change modernizes the index AM API> to look more
like the designs we have adopted for foreign data wrappers and
tablesample handlers. This simplifies the C> code
- and should make it more feasible to define index access methods in
+ and makes it much more practical to define index access methods in
installable extensions. A consequence is that most of the columns
of the pg_am> system catalog have disappeared.
+ New inspection
+ functions have been added to allow SQL queries to determine
+ index AM properties that used to be discoverable
+ from pg_am>.
--
cgit v1.2.3
From 39d866433e6fb1c385eee8dc67843097b8703add Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Sun, 28 Aug 2016 17:40:06 -0400
Subject: Make another editorial pass over the 9.6 release notes.
I think they're pretty much release-quality now.
---
doc/src/sgml/func.sgml | 28 +--
doc/src/sgml/release-9.6.sgml | 447 +++++++++++++++++++++++-------------------
doc/src/sgml/spgist.sgml | 1 +
3 files changed, 264 insertions(+), 212 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 5c1c4f69fb..5148095fb3 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -18580,7 +18580,7 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
pg_replication_origin_drop(node_nametext)
- void
+ void>
Delete a previously created replication origin, including any
@@ -18612,7 +18612,7 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
pg_replication_origin_session_setup(node_nametext)
- void
+ void>
Mark the current session as replaying from the given
@@ -18630,7 +18630,7 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
pg_replication_origin_session_reset()
- void
+ void>
Cancel the effects
@@ -18679,7 +18679,7 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
pg_replication_origin_xact_setup(origin_lsnpg_lsn, origin_timestamptimestamptz)
- void
+ void>
Mark the current transaction as replaying a transaction that has
@@ -18698,7 +18698,7 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
pg_replication_origin_xact_reset()
- void
+ void>
Cancel the effects of
@@ -18714,7 +18714,7 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
pg_replication_origin_advance(node_nametext, pospg_lsn)
- void
+ void>
Set replication progress for the given node to the given
@@ -19174,7 +19174,7 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
- brin_summarize_new_values(index_oid> regclass>)
+ brin_summarize_new_values(index> regclass>)integersummarize page ranges not already summarized
@@ -19191,8 +19191,8 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
- brin_summarize_new_values> receives a BRIN index OID as
- argument and inspects the index to find page ranges in the base table
+ brin_summarize_new_values> accepts the OID or name of a
+ BRIN index and inspects the index to find page ranges in the base table
that are not currently summarized by the index; for any such range
it creates a new summary index tuple by scanning the table pages.
It returns the number of new page range summaries that were inserted
@@ -19201,12 +19201,12 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
gin_clean_pending_list> accepts the OID or name of
- a GIN index and cleans up the pending list of the specified GIN index
+ a GIN index and cleans up the pending list of the specified index
by moving entries in it to the main GIN data structure in bulk.
- It returns the number of pages cleaned up from the pending list.
- Note that if the argument is a GIN index built with fastupdate>
- option disabled, the cleanup does not happen and the return value is 0
- because the index doesn't have a pending list.
+ It returns the number of pages removed from the pending list.
+ Note that if the argument is a GIN index built with
+ the fastupdate> option disabled, no cleanup happens and the
+ return value is 0, because the index doesn't have a pending list.
Please see and
for details of the pending list and fastupdate> option.
diff --git a/doc/src/sgml/release-9.6.sgml b/doc/src/sgml/release-9.6.sgml
index 6ec7583485..895d88e768 100644
--- a/doc/src/sgml/release-9.6.sgml
+++ b/doc/src/sgml/release-9.6.sgml
@@ -23,33 +23,33 @@
- Parallel sequential scans, joins and aggregates
+ Parallel execution of sequential scans, joins and aggregates
- Eliminate repetitive scanning of old data by autovacuum
+ Autovacuum no longer performs repetitive scanning of old data
- Synchronous replication now allows multiple standby servers, for
+ Synchronous replication now allows multiple standby servers for
increased reliability
- Allow full-text search for phrases (multiple adjacent words)
+ Full-text search can now search for phrases (multiple adjacent words)
- Support foreign/remote joins, sorts, and UPDATE>s in
- postgres_fdw>
+ postgres_fdw> now supports remote joins, sorts,
+ UPDATE>s, and DELETE>s
@@ -87,25 +87,6 @@
-
- Change the column name in the
- information_schema>.routines>
- view from result_cast_character_set_name>
- to result_cast_char_set_name> (Clément
- Prévost)
-
-
-
- The SQL:2011 standard specifies the longer name, but that appears
- to be a mistake, because adjacent column names use the shorter
- style, as do other information_schema> views.
-
-
-
-
-
@@ -117,7 +98,7 @@
Historically a process has only been shown as waiting if it was
- waiting for a heavy weight lock. Now waits for light weight locks
+ waiting for a heavyweight lock. Now waits for lightweight locks
and buffer pins are also shown in pg_stat_activity>.
Also, the type of lock being waited for is now visible.
These changes replace the waiting> column with
@@ -149,18 +130,18 @@
Make extract()>> behave
- more reasonably with infinite> inputs (Vitaly Burovoy)
+ more reasonably with infinite inputs (Vitaly Burovoy)
Historically the extract()> function just returned
zero given an infinite timestamp, regardless of the given
- unit name. Make it return infinity
+ field name. Make it return infinity
or -infinity as appropriate when the
requested field is one that is monotonically increasing (e.g,
year>, epoch>), or NULL> when
it is not (e.g., day>, hour>). Also,
- throw the expected error for bad unit names.
+ throw the expected error for bad field names.
@@ -186,8 +167,8 @@ This commit is also listed under libpq and psql
2016-03-29 [61d66c44f] Fix support of digits in email/hostnames.
-->
- Fix text search parser to allow leading digits in email>
- and host> tokens (Artur Zakirov)
+ Fix the default text search parser to allow leading digits
+ in email> and host> tokens (Artur Zakirov)
@@ -205,18 +186,18 @@ This commit is also listed under libpq and psql
2016-03-16 [9a206d063] Improve script generating unaccent rules
-->
- Extend contrib/unaccent>'s standard
- unaccent.rules> file to handle all diacritics
- known to Unicode, and expand ligatures correctly (Thomas Munro,
+ Extend contrib/unaccent>>'s
+ standard unaccent.rules> file to handle all diacritics
+ known to Unicode, and to expand ligatures correctly (Thomas Munro,
Léonard Benedetti)
- The previous version omitted some less-common letters with
- diacritic marks. It now also expands ligatures into separate
- letters. Installations that use this rules file may wish to
- rebuild tsvector> columns and indexes that depend on
- the result.
+ The previous version neglected to convert some less-common letters
+ with diacritic marks. Also, ligatures are now expanded into
+ separate letters. Installations that use this rules file may wish
+ to rebuild tsvector> columns and indexes that depend on the
+ result.
@@ -258,17 +239,38 @@ This commit is also listed under libpq and psql
+
+ Change a column name in the
+ information_schema>.routines>
+ view from result_cast_character_set_name>
+ to result_cast_char_set_name> (Clément
+ Prévost)
+
+
+
+ The SQL:2011 standard specifies the longer name, but that appears
+ to be a mistake, because adjacent column names use the shorter
+ style, as do other information_schema> views.
+
+
+
+
+
- Support multiple and
- command-line options (Pavel Stehule, Catalin Iacob)
+ psql>'s option no longer implies
+
+ (Pavel Stehule, Catalin Iacob)
- To allow this with sane behavior, one backwards incompatibility
- had to be introduced: no longer implies
- .
+ Write (or its
+ abbreviation ) explicitly to obtain the old
+ behavior. Scripts modified this way will still work with old
+ versions of psql>.
@@ -277,7 +279,7 @@ This commit is also listed under libpq and psql
2015-07-02 [5671aaca8] Improve pg_restore's -t switch to match all types of rel
-->
- Improve pg_restore>'s switch to
+ Improve pg_restore>'s option to
match all types of relations, not only plain tables (Craig Ringer)
@@ -287,7 +289,7 @@ This commit is also listed under libpq and psql
2016-02-12 [59a884e98] Change delimiter used for display of NextXID
-->
- Change the display format of NextXID> in
+ Change the display format used for NextXID> in
pg_controldata> and related places (Joe Conway,
Bruce Momjian)
@@ -383,8 +385,8 @@ This commit is also listed under libpq and psql
2015-09-16 [7aea8e4f2] Determine whether it's safe to attempt a parallel plan f
-->
- Provide infrastructure for marking the parallel-safe status of
- functions (Robert Haas, Amit Kapila)
+ Provide infrastructure for marking the parallel-safety status of
+ functions (Robert Haas, Amit Kapila)
@@ -432,8 +434,12 @@ This commit is also listed under libpq and psql
Add gin_clean_pending_list()>>
function to allow manual invocation of pending-list cleanup for a
- GIN index, separately from vacuuming or analyzing the parent table
- (Jeff Janes)
+ GIN index (Jeff Janes)
+
+
+
+ Formerly, such cleanup happened only as a byproduct of vacuuming or
+ analyzing the parent table.
@@ -711,7 +717,7 @@ This commit is also listed under libpq and psql
If necessary, vacuum can be forced to process all-frozen
pages using the new DISABLE_PAGE_SKIPPING> option.
- Normally, this should never be needed but it might help in
+ Normally this should never be needed, but it might help in
recovering from visibility-map corruption.
@@ -774,7 +780,7 @@ This commit is also listed under libpq and psql
2016-02-11 [d4c3a156c] Remove GROUP BY columns that are functionally dependent
-->
- Avoid computing GROUP BY> columns if they are
+ Ignore GROUP BY> columns that are
functionally dependent on other columns (David Rowley)
@@ -788,43 +794,21 @@ This commit is also listed under libpq and psql
-
- When appropriate, postpone evaluation of SELECT>
- output expressions until after an ORDER BY> sort
- (Konstantin Knizhnik)
-
-
-
- This change ensures that volatile or expensive functions in the
- output list are executed in the order suggested by ORDER
- BY>, and that they are not evaluated more times than required
- when there is a LIMIT> clause. Previously, these
- properties held if the ordering was performed by an index scan or
- pre-merge-join sort, but not if it was performed by a top-level
- sort.
-
-
-
-
-
Allow use of an index-only
scan on a partial index when the index's WHERE>
- clause references columns which are not indexed (Tomas Vondra,
+ clause references columns that are not indexed (Tomas Vondra,
Kyotaro Horiguchi)
- For example, CREATE INDEX tidx_partial ON t(b) WHERE a
- > 0> could not previously be used for an index-only scan by a
- query that only referenced a> in its WHERE>
- clause because a> is not an indexed value like
- b> is.
+ For example, an index defined by CREATE INDEX tidx_partial
+ ON t(b) WHERE a > 0> can now be used for an index-only scan by
+ a query that specifies WHERE a > 0> and does not
+ otherwise use a>. Previously this was disallowed
+ because a> is not listed as an index column.
@@ -875,11 +859,12 @@ This commit is also listed under libpq and psql
On Linux, sync_file_range()> is used for this purpose,
- and the feature is on by default on Linux because that function has few
- downsides. This sync capability is also available on other platforms
- that have msync()> or posix_fadvise()>,
- but those interfaces have some undesirable side-effects so the
- feature is disabled by default on non-Linux platforms.
+ and the feature is on by default on Linux because that function has
+ few downsides. This flushing capability is also available on other
+ platforms if they have msync()>
+ or posix_fadvise()>, but those interfaces have some
+ undesirable side-effects so the feature is disabled by default on
+ non-Linux platforms.
@@ -902,8 +887,8 @@ This commit is also listed under libpq and psql
- For example, SELECT AVG(x), SUM(x) FROM x> can use a
- single per-row compuation for both aggregates.
+ For example, SELECT AVG(x), VARIANCE(x) FROM tab> can use
+ a single per-row computation for both aggregates.
@@ -913,9 +898,9 @@ This commit is also listed under libpq and psql
-->
Speed up visibility tests for recently-created tuples by checking
- our transaction snapshot, not pg_clog>, to decide
- if the source transaction should be considered committed (Jeff
- Janes, Tom Lane)
+ the current transaction's snapshot, not pg_clog>, to
+ decide if the source transaction should be considered committed
+ (Jeff Janes, Tom Lane)
@@ -940,10 +925,11 @@ This commit is also listed under libpq and psql
Two-phase commit information is now written only to WAL>
- during PREPARE TRANSACTION>, and read from
- WAL> during COMMIT PREPARED>. A separate
- state file is created only if the pending transaction does not
- get committed or aborted by the time of the next checkpoint.
+ during PREPARE TRANSACTION>, and will be read back from
+ WAL> during COMMIT PREPARED> if that happens
+ soon thereafter. A separate state file is created only if the
+ pending transaction does not get committed or aborted by the time
+ of the next checkpoint.
@@ -1142,9 +1128,9 @@ This commit is also listed under libpq and psql
This function returns an array of the process IDs of any
sessions that are blocking the session with the given process ID.
Historically users have obtained such information using a self-join
- on the pg_locks> view. However, it is unreasonably
+ on the pg_locks> view. However, it is unreasonably
tedious to do it that way with any modicum of correctness, and
- the addition of parallel queries has made the approach entirely
+ the addition of parallel queries has made the old approach entirely
impractical, since locks might be held or awaited by child worker
processes rather than the session's main process.
@@ -1181,7 +1167,7 @@ This commit is also listed under libpq and psql
- The memory usage dump output to the postmaster log during an
+ The memory usage dump that is output to the postmaster log during an
out-of-memory failure now summarizes statistics when there are a
large number of memory contexts, rather than possibly generating
a very large report. There is also a grand total>
@@ -1203,7 +1189,8 @@ This commit is also listed under libpq and psql
2016-04-08 [34c33a1f0] Add BSD authentication method.
-->
- Add an bsd> authentication method to allow the use of
+ Add a bsd> authentication
+ method to allow use of
the BSD> Authentication service for
PostgreSQL> client authentication (Marisa Emerson)
@@ -1219,9 +1206,10 @@ This commit is also listed under libpq and psql
2016-04-08 [2f1d2b7a7] Set PAM_RHOST item for PAM authentication
-->
- When using PAM> authentication, provide the client
- IP address or host name to PAM> modules via the
- PAM_RHOST> item (Grzegorz Sampolski)
+ When using PAM>
+ authentication, provide the client IP address or host name
+ to PAM> modules via the PAM_RHOST> item
+ (Grzegorz Sampolski)
@@ -1230,8 +1218,8 @@ This commit is also listed under libpq and psql
2016-01-07 [5e0b5dcab] Provide more detail in postmaster log for password authe
-->
- Provide detail in the postmaster log during more password
- authentication failures (Tom Lane)
+ Provide detail in the postmaster log for more types of password
+ authentication failure (Tom Lane)
@@ -1245,8 +1233,8 @@ This commit is also listed under libpq and psql
2015-09-06 [643beffe8] Support RADIUS passwords up to 128 characters
-->
- Support RADIUS> passwords up to 128 characters long
- (Marko Tiikkaja)
+ Support RADIUS> passwords>
+ up to 128 characters long (Marko Tiikkaja)
@@ -1255,7 +1243,8 @@ This commit is also listed under libpq and psql
2016-04-08 [35e2e357c] Add authentication parameters compat_realm and upn_usena
-->
- Add new SSPI> authentication parameters
+ Add new SSPI>
+ authentication parameters
compat_realm> and upn_username> to control
whether NetBIOS> or Kerberos>
realm names and user names are used during SSPI>
@@ -1302,30 +1291,13 @@ This commit is also listed under libpq and psql
-
- Add configure option
-
-
- This allows the use of systemd> service units of
- type notify>, which greatly simplifies the management
- of PostgreSQL> under systemd>.
-
-
-
-
-
- Add log_line_prefix> option %n> to print
- the current time as a Unix epoch, with milliseconds (Tomas Vondra,
- Jeff Davis)
+ Add option %n> to
+ print the current time in Unix epoch form, with milliseconds (Tomas
+ Vondra, Jeff Davis)
@@ -1362,6 +1334,23 @@ This commit is also listed under libpq and psql
+
+ Add configure option
+
+
+ This allows the use of systemd> service units of
+ type notify>, which greatly simplifies the management
+ of PostgreSQL> under systemd>.
+
+
+
+
+
@@ -1407,8 +1396,8 @@ This commit is also listed under libpq and psql
but that is unsafe and inefficient. It also prevents a new
postmaster from being started until the last old backend has
exited. Backends will detect postmaster death when waiting for
- client I/O, so the exit will not be instantaneous, but it in most
- cases should happen no later than the end of the current query.
+ client I/O, so the exit will not be instantaneous, but it should
+ happen no later than the end of the current query.
@@ -1541,7 +1530,8 @@ XXX this is pending backpatch, may need to remove
-->
Add a option to
- pg_basebackup> (Peter Eisentraut)
+ pg_basebackup>>
+ (Peter Eisentraut)
@@ -1612,6 +1602,28 @@ XXX this is pending backpatch, may need to remove
+
+ When appropriate, postpone evaluation of SELECT>
+ output expressions until after an ORDER BY> sort
+ (Konstantin Knizhnik)
+
+
+
+ This change ensures that volatile or expensive functions in the
+ output list are executed in the order suggested by ORDER
+ BY>, and that they are not evaluated more times than required
+ when there is a LIMIT> clause. Previously, these
+ properties held if the ordering was performed by an index scan or
+ pre-merge-join sort, but not if it was performed by a top-level
+ sort.
+
+
+
+
+
@@ -1663,8 +1675,8 @@ XXX this is pending backpatch, may need to remove
Previously, the foreign join pushdown infrastructure left the
question of security entirely up to individual foreign data
- wrappers, but it would be too easy for an FDW> to
- inadvertently open up subtle security hole. So, make it the core
+ wrappers, but that made it too easy for an FDW> to
+ inadvertently create subtle security holes. So, make it the core
code's job to determine which role ID will access each table,
and do not attempt join pushdown unless the role is the same for
all relevant relations.
@@ -1707,7 +1719,7 @@ XXX this is pending backpatch, may need to remove
This command allows a database object to be marked as depending
- on an extension, so that it will be automatically dropped if
+ on an extension, so that it will be dropped automatically if
the extension is dropped (without needing CASCADE>).
However, the object is not part of the extension, and thus will
be dumped separately by pg_dump>.
@@ -1777,8 +1789,8 @@ XXX this is pending backpatch, may need to remove
-->
Add a CASCADE> option to CREATE
- EXTENSION to automatically create extensions it depends
- on (Petr Jelínek)
+ EXTENSION to automatically create any extensions the
+ requested one depends on (Petr Jelínek)
@@ -1803,7 +1815,7 @@ XXX this is pending backpatch, may need to remove
- This is possible because the table has no existing rows. This matches
+ This is safe because the table has no existing rows. This matches
the longstanding behavior of FOREIGN KEY> constraints.
@@ -1918,10 +1930,10 @@ XXX this is pending backpatch, may need to remove
2016-06-27 [6734a1cac] Change predecence of phrase operator.
-->
- Improve full-text search to support searching for phrases, that
- is, lexemes appearing adjacent to each other in a specific order,
- or with a specified distance between them (Teodor Sigaev, Oleg
- Bartunov, Dmitry Ivanov)
+ Improve full-text search> to support
+ searching for phrases, that is, lexemes appearing adjacent to each
+ other in a specific order, or with a specified distance between
+ them (Teodor Sigaev, Oleg Bartunov, Dmitry Ivanov)
@@ -2001,9 +2013,10 @@ XXX this is pending backpatch, may need to remove
2016-03-17 [f4ceed6ce] Improve support of Hunspell
-->
- Upgrade the ispell> dictionary to handle modern
- Hunspell> files and support more languages
- (Artur Zakirov)
+ Upgrade
+ the ispell>>
+ dictionary type to handle modern Hunspell> files and
+ support more languages (Artur Zakirov)
@@ -2012,7 +2025,9 @@ XXX this is pending backpatch, may need to remove
2015-10-30 [12c9a0400] Implement lookbehind constraints in our regular-expressi
-->
- Implement look-behind constraints in regular expressions (Tom Lane)
+ Implement look-behind constraints
+ in regular expressions>
+ (Tom Lane)
@@ -2044,7 +2059,7 @@ XXX this is pending backpatch, may need to remove
2015-11-07 [c5e86ea93] Add "xid <> xid" and "xid <> int4" operators.
-->
- Add transaction id operators xid> <>>
+ Add transaction ID operators xid> <>>
xid> and xid> <>> int4>,
for consistency with the corresponding equality operators
(Michael Paquier)
@@ -2090,8 +2105,10 @@ XXX this is pending backpatch, may need to remove
2016-01-05 [abb173392] Add scale(numeric)
-->
- Add a scale(numeric)> function to extract the display
- scale of a numeric> value (Marko Tiikkaja)
+ Add a scale(numeric)>>
+ function to extract the display scale of a numeric> value
+ (Marko Tiikkaja)
@@ -2109,7 +2126,7 @@ XXX this is pending backpatch, may need to remove
measures its argument in degrees, whereas sin()>
measures in radians. These functions go to some lengths to
deliver exact results for values where an exact result can be
- expected, e.g. sind(30) = 0.5.
+ expected, for instance sind(30) = 0.5.
@@ -2125,9 +2142,9 @@ XXX this is pending backpatch, may need to remove
The POSIX> standard says that these functions should
- return NaN> for NaN input, and should throw an error for
- out-of-range inputs including infinity>. Previously our
- behavior varied across platforms.
+ return NaN> for NaN> input, and should throw
+ an error for out-of-range inputs including infinity>.
+ Previously our behavior varied across platforms.
@@ -2136,9 +2153,10 @@ XXX this is pending backpatch, may need to remove
2016-03-29 [e511d878f] Allow to_timestamp(float8) to convert float infinity to
-->
- Make to_timestamp(float8)> convert float
- infinity> to timestamp infinity> (Vitaly
- Burovoy)
+ Make to_timestamp(float8)>>
+ convert float infinity> to
+ timestamp infinity> (Vitaly Burovoy)
@@ -2170,11 +2188,12 @@ XXX this is pending backpatch, may need to remove
2015-09-17 [9acb9007d] Fix oversight in tsearch type check
-->
- Allow ts_stat_sql()> and
- tsvector_update_trigger()> to operate on values that
- are of types binary-compatible with the expected argument type,
- not just that argument type; for example allow citext>
- where text> is expected (Teodor Sigaev)
+ Allow ts_stat()>>
+ and tsvector_update_trigger()>>
+ to operate on values that are of types binary-compatible with the
+ expected argument type, not only that argument type; for example
+ allow citext> where text> is expected (Teodor
+ Sigaev)
@@ -2216,8 +2235,9 @@ XXX this is pending backpatch, may need to remove
In to_number()>>,
- interpret V> as dividing by 10 to the power of the
- number of digits following V> (Bruce Momjian)
+ interpret a V> format code as dividing by 10 to the
+ power of the number of digits following V> (Bruce
+ Momjian)
@@ -2231,8 +2251,10 @@ XXX this is pending backpatch, may need to remove
2016-01-05 [ea0d494da] Make the to_reg*() functions accept text not cstring.
-->
- Make the to_reg*()> functions accept type text>
- not cstring> (Petr Korobeinikov)
+ Make the to_reg*()>>
+ functions accept type text> not cstring>
+ (Petr Korobeinikov)
@@ -2289,7 +2311,7 @@ XXX this is pending backpatch, may need to remove
This allows avoiding an error for an unrecognized parameter
- name, and instead return a NULL>.
+ name, instead returning a NULL>.
@@ -2348,7 +2370,7 @@ XXX this is pending backpatch, may need to remove
In PL/pgSQL, detect mismatched
CONTINUE> and EXIT> statements while
- compiling PL/pgSQL functions, rather than at execution time
+ compiling a function, rather than at execution time
(Jim Nasby)
@@ -2453,8 +2475,9 @@ XXX this is pending backpatch, may need to remove
2016-08-26 [e796d0aba] Add a nonlocalized version of the severity field to clie
-->
- Add a nonlocalized version of the severity field in error and notice
- messages (Tom Lane)
+ Add a nonlocalized version of
+ the severity field> in
+ error and notice messages (Tom Lane)
@@ -2495,6 +2518,8 @@ This commit is also listed under psql and PL/pgSQL
+ This is done with the new function PQresultVerboseErrorMessage()>>.
This supports psql>'s new \errverbose>
feature, and may be useful for other clients as well.
@@ -2541,8 +2566,10 @@ This commit is also listed under psql and PL/pgSQL
2015-09-14 [d02426029] Check existency of table/schema for -t/-n option (pg_dum
-->
- Add a
@@ -2594,6 +2621,22 @@ This commit is also listed under psql and PL/pgSQL
+
+ Support multiple
-c
and
-f
+ command-line options (Pavel Stehule, Catalin Iacob)
+
+
+
+ The specified operations are carried out in the order in which the
+ options are given, and then psql> terminates.
+
+
+
+
+
@@ -2679,7 +2722,7 @@ This commit is also listed under psql and PL/pgSQL
2016-06-15 [9901d8ac2] Use strftime("%c") to format timestamps in psql's \watch
-->
- Improve the headers output of the \watch> command
+ Improve the headers output by the \watch> command
(Michael Paquier, Tom Lane)
@@ -2790,8 +2833,9 @@ This commit is also listed under libpq and PL/pgSQL
This change allows SQL commands in scripts to span multiple lines.
Existing custom scripts will need to be modified to add a semicolon
- at the end of each line if missing. (Doing so does not break
- the script for use with older versions of pgbench>.)
+ at the end of each line that does not have one already. (Doing so
+ does not break the script for use with older versions
+ of pgbench>.)
@@ -2960,8 +3004,9 @@ This commit is also listed under libpq and PL/pgSQL
2015-12-01 [e50cda784] Use pg_rewind when target timeline was switched
-->
- Improve pg_rewind> so that it can work when the
- target timeline changes (Alexander Korotkov)
+ Improve pg_rewind>>
+ so that it can work when the target timeline changes (Alexander
+ Korotkov)
@@ -3095,8 +3140,9 @@ This commit is also listed under libpq and PL/pgSQL
2016-08-13 [ed0097e4f] Add SQL-accessible functions for inspecting index AM pro
-->
- Restructure index access method API> to hide most of
- it at the C> level (Alexander Korotkov, Andrew Gierth)
+ Restructure index access
+ method API>> to hide most of it at
+ the C> level (Alexander Korotkov, Andrew Gierth)
@@ -3118,9 +3164,11 @@ This commit is also listed under libpq and PL/pgSQL
2016-04-06 [6c268df12] Add new catalog called pg_init_privs
-->
- Add pg_init_privs> system catalog to hold original
- privileges of initdb>-created and extension-created
- objects (Stephen Frost)
+ Add pg_init_privs>>
+ system catalog to hold original privileges
+ of initdb>-created and extension-created objects
+ (Stephen Frost)
@@ -3315,7 +3363,7 @@ This commit is also listed under libpq and PL/pgSQL
This is somewhat like the reconstructed value>, but it
- could be any arbitrary chunk of data, it need not be of the same
+ could be any arbitrary chunk of data, not necessarily of the same
data type as the indexed column.
@@ -3367,9 +3415,10 @@ This commit is also listed under libpq and PL/pgSQL
2016-03-13 [7a8d87483] Rename auto_explain.sample_ratio to sample_rate
-->
- Add configuration parameter auto_explain.sample_rate>
- to allow contrib/auto_explain> to capture just a
- configurable fraction of all queries (Craig Ringer, Julien Rouhaud)
+ Add configuration parameter auto_explain.sample_rate> to
+ allow contrib/auto_explain>>
+ to capture just a configurable fraction of all queries (Craig
+ Ringer, Julien Rouhaud)
@@ -3383,9 +3432,9 @@ This commit is also listed under libpq and PL/pgSQL
2016-04-01 [9ee014fc8] Bloom index contrib module
-->
- Add contrib/bloom> module that implements an index
- access method based on Bloom filtering (Teodor Sigaev, Alexander
- Korotkov)
+ Add contrib/bloom>> module that
+ implements an index access method based on Bloom filtering (Teodor
+ Sigaev, Alexander Korotkov)
@@ -3401,9 +3450,9 @@ This commit is also listed under libpq and PL/pgSQL
2015-12-28 [81ee726d8] Code and docs review for cube kNN support.
-->
- In contrib/cube>, introduce distance operators for
- cubes, and support kNN-style searches in GiST indexes on cube
- columns (Stas Kelvich)
+ In contrib/cube>>, introduce
+ distance operators for cubes, and support kNN-style searches in
+ GiST indexes on cube columns (Stas Kelvich)
@@ -3434,8 +3483,9 @@ This commit is also listed under libpq and PL/pgSQL
-->
Add selectivity estimation functions for
- contrib/intarray> operators to improve plans for
- queries using those operators (Yury Zhuravlev, Alexander Korotkov)
+ contrib/intarray>> operators
+ to improve plans for queries using those operators (Yury Zhuravlev,
+ Alexander Korotkov)
@@ -3470,7 +3520,8 @@ This commit is also listed under libpq and PL/pgSQL
-->
Add support for word similarity> to
- contrib/pg_trgm> (Alexander Korotkov, Artur Zakirov)
+ contrib/pg_trgm>>
+ (Alexander Korotkov, Artur Zakirov)
@@ -3486,9 +3537,8 @@ This commit is also listed under libpq and PL/pgSQL
-->
Add configuration parameter
- pg_trgm.similarity_threshold> for contrib/pg_trgm>>'s similarity
- threshold (Artur Zakirov)
+ pg_trgm.similarity_threshold> for
+ contrib/pg_trgm>'s similarity threshold (Artur Zakirov)
@@ -3525,8 +3575,9 @@ This commit is also listed under libpq and PL/pgSQL
2016-06-17 [71d05a2c7] pg_visibility: Add pg_truncate_visibility_map function.
-->
- Add contrib/pg_visibility> module to allow examining
- table visibility maps (Robert Haas)
+ Add contrib/pg_visibility>> module
+ to allow examining table visibility maps (Robert Haas)
@@ -3545,7 +3596,7 @@ This commit is also listed under libpq and PL/pgSQL
- postgres_fdw>
+ postgres_fdw>>
@@ -3597,7 +3648,7 @@ This commit is also listed under libpq and PL/pgSQL
- Formerly, this involved sending a SELECT FOR UPDATE>
+ Formerly, remote updates involved sending a SELECT FOR UPDATE>
command and then updating or deleting the selected rows one-by-one.
While that is still necessary if the operation requires any local
processing, it can now be done remotely if all elements of the
diff --git a/doc/src/sgml/spgist.sgml b/doc/src/sgml/spgist.sgml
index d60aa23f33..cd4a8d07c4 100644
--- a/doc/src/sgml/spgist.sgml
+++ b/doc/src/sgml/spgist.sgml
@@ -674,6 +674,7 @@ typedef struct spgInnerConsistentOut
However, any output traverse values pointed to by
the traversalValues> array should be allocated
in traversalMemoryContext>.
+ Each traverse value must be a single palloc'd chunk.
--
cgit v1.2.3
From 9b7cd59af1afcfbd786921d5cf73befb5fefa2f7 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas
Date: Mon, 29 Aug 2016 20:16:02 +0300
Subject: Remove support for OpenSSL versions older than 0.9.8.
OpenSSL officially only supports 1.0.1 and newer. Some OS distributions
still provide patches for 0.9.8, but anything older than that is not
interesting anymore. Let's simplify things by removing compatibility code.
Andreas Karlsson, with small changes by me.
---
contrib/pgcrypto/openssl.c | 152 +------------------------------
doc/src/sgml/installation.sgml | 39 +++-----
doc/src/sgml/libpq.sgml | 3 +-
doc/src/sgml/pgcrypto.sgml | 18 +---
src/backend/libpq/be-secure-openssl.c | 8 +-
src/interfaces/libpq/fe-secure-openssl.c | 4 -
src/interfaces/libpq/libpq-int.h | 2 +-
7 files changed, 20 insertions(+), 206 deletions(-)
(limited to 'doc/src')
diff --git a/contrib/pgcrypto/openssl.c b/contrib/pgcrypto/openssl.c
index 976af70591..ffab5d2bb0 100644
--- a/contrib/pgcrypto/openssl.c
+++ b/contrib/pgcrypto/openssl.c
@@ -37,6 +37,7 @@
#include
#include
#include
+#include
#include
#include
@@ -46,155 +47,6 @@
#define MAX_KEY (512/8)
#define MAX_IV (128/8)
-/*
- * Compatibility with OpenSSL 0.9.6
- *
- * It needs AES and newer DES and digest API.
- */
-#if OPENSSL_VERSION_NUMBER >= 0x00907000L
-
-/*
- * Nothing needed for OpenSSL 0.9.7+
- */
-
-#include
-#else /* old OPENSSL */
-
-/*
- * Emulate OpenSSL AES.
- */
-
-#include "rijndael.c"
-
-#define AES_ENCRYPT 1
-#define AES_DECRYPT 0
-#define AES_KEY rijndael_ctx
-
-static int
-AES_set_encrypt_key(const uint8 *key, int kbits, AES_KEY *ctx)
-{
- aes_set_key(ctx, key, kbits, 1);
- return 0;
-}
-
-static int
-AES_set_decrypt_key(const uint8 *key, int kbits, AES_KEY *ctx)
-{
- aes_set_key(ctx, key, kbits, 0);
- return 0;
-}
-
-static void
-AES_ecb_encrypt(const uint8 *src, uint8 *dst, AES_KEY *ctx, int enc)
-{
- memcpy(dst, src, 16);
- if (enc)
- aes_ecb_encrypt(ctx, dst, 16);
- else
- aes_ecb_decrypt(ctx, dst, 16);
-}
-
-static void
-AES_cbc_encrypt(const uint8 *src, uint8 *dst, int len, AES_KEY *ctx, uint8 *iv, int enc)
-{
- memcpy(dst, src, len);
- if (enc)
- {
- aes_cbc_encrypt(ctx, iv, dst, len);
- memcpy(iv, dst + len - 16, 16);
- }
- else
- {
- aes_cbc_decrypt(ctx, iv, dst, len);
- memcpy(iv, src + len - 16, 16);
- }
-}
-
-/*
- * Emulate DES_* API
- */
-
-#define DES_key_schedule des_key_schedule
-#define DES_cblock des_cblock
-#define DES_set_key(k, ks) \
- des_set_key((k), *(ks))
-#define DES_ecb_encrypt(i, o, k, e) \
- des_ecb_encrypt((i), (o), *(k), (e))
-#define DES_ncbc_encrypt(i, o, l, k, iv, e) \
- des_ncbc_encrypt((i), (o), (l), *(k), (iv), (e))
-#define DES_ecb3_encrypt(i, o, k1, k2, k3, e) \
- des_ecb3_encrypt((des_cblock *)(i), (des_cblock *)(o), \
- *(k1), *(k2), *(k3), (e))
-#define DES_ede3_cbc_encrypt(i, o, l, k1, k2, k3, iv, e) \
- des_ede3_cbc_encrypt((i), (o), \
- (l), *(k1), *(k2), *(k3), (iv), (e))
-
-/*
- * Emulate newer digest API.
- */
-
-static void
-EVP_MD_CTX_init(EVP_MD_CTX *ctx)
-{
- memset(ctx, 0, sizeof(*ctx));
-}
-
-static int
-EVP_MD_CTX_cleanup(EVP_MD_CTX *ctx)
-{
- px_memset(ctx, 0, sizeof(*ctx));
- return 1;
-}
-
-static int
-EVP_DigestInit_ex(EVP_MD_CTX *ctx, const EVP_MD *md, void *engine)
-{
- EVP_DigestInit(ctx, md);
- return 1;
-}
-
-static int
-EVP_DigestFinal_ex(EVP_MD_CTX *ctx, unsigned char *res, unsigned int *len)
-{
- EVP_DigestFinal(ctx, res, len);
- return 1;
-}
-#endif /* old OpenSSL */
-
-/*
- * Provide SHA2 for older OpenSSL < 0.9.8
- */
-#if OPENSSL_VERSION_NUMBER < 0x00908000L
-
-#include "sha2.c"
-#include "internal-sha2.c"
-
-typedef void (*init_f) (PX_MD *md);
-
-static int
-compat_find_digest(const char *name, PX_MD **res)
-{
- init_f init = NULL;
-
- if (pg_strcasecmp(name, "sha224") == 0)
- init = init_sha224;
- else if (pg_strcasecmp(name, "sha256") == 0)
- init = init_sha256;
- else if (pg_strcasecmp(name, "sha384") == 0)
- init = init_sha384;
- else if (pg_strcasecmp(name, "sha512") == 0)
- init = init_sha512;
- else
- return PXE_NO_HASH;
-
- *res = px_alloc(sizeof(PX_MD));
- init(*res);
- return 0;
-}
-#else
-#define compat_find_digest(name, res) (PXE_NO_HASH)
-#endif
-
/*
* Hashes
*/
@@ -275,7 +127,7 @@ px_find_digest(const char *name, PX_MD **res)
md = EVP_get_digestbyname(name);
if (md == NULL)
- return compat_find_digest(name, res);
+ return PXE_NO_HASH;
digest = px_alloc(sizeof(*digest));
digest->algo = md;
diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml
index a9968756e6..14a6d57aea 100644
--- a/doc/src/sgml/installation.sgml
+++ b/doc/src/sgml/installation.sgml
@@ -252,10 +252,17 @@ su - postgres
- You need Kerberos>, OpenSSL>,
- OpenLDAP>, and/or
- PAM>, if you want to support authentication or
- encryption using those services.
+ You need OpenSSL>, if you want to support
+ encrypted client connections. The minimum required version is
+ 0.9.8.
+
+
+
+
+
+ You need Kerberos>, OpenLDAP>,
+ and/or PAM>, if you want to support authentication
+ using those services.
@@ -2826,30 +2833,6 @@ MANPATH=/usr/lib/scohelp/%L/man:/usr/dt/man:/usr/man:/usr/share/man:scohelp:/usr
-
- Problems with OpenSSL
-
-
- When you build PostgreSQL with OpenSSL support you might get
- compilation errors in the following files:
-
- src/backend/libpq/crypt.c
- src/backend/libpq/password.c
- src/interfaces/libpq/fe-auth.c
- src/interfaces/libpq/fe-connect.c
-
-
- This is because of a namespace conflict between the standard
- /usr/include/crypt.h header and the header
- files provided by OpenSSL.
-
-
-
- Upgrading your OpenSSL installation to version 0.9.6a fixes this
- problem. Solaris 9 and above has a newer version of OpenSSL.
-
-
-
configure Complains About a Failed Test Program
diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml
index 2f9350b10e..4e34f00e44 100644
--- a/doc/src/sgml/libpq.sgml
+++ b/doc/src/sgml/libpq.sgml
@@ -1238,8 +1238,7 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
If set to 1 (default), data sent over SSL connections will be
- compressed (this requires OpenSSL> version
- 0.9.8 or later).
+ compressed.
If set to 0, compression will be disabled (this requires
OpenSSL> 1.0.0 or later).
This parameter is ignored if a connection without SSL is made,
diff --git a/doc/src/sgml/pgcrypto.sgml b/doc/src/sgml/pgcrypto.sgml
index c4cefde4f7..bf514aacf3 100644
--- a/doc/src/sgml/pgcrypto.sgml
+++ b/doc/src/sgml/pgcrypto.sgml
@@ -1184,12 +1184,12 @@ gen_random_uuid() returns uuid
SHA224/256/384/512yes
- yes (Note 1)
+ yesOther digest algorithmsno
- yes (Note 2)
+ yes (Note 1)Blowfish
@@ -1199,7 +1199,7 @@ gen_random_uuid() returns uuid
AESyes
- yes (Note 3)
+ yesDES/3DES/CAST5
@@ -1230,12 +1230,6 @@ gen_random_uuid() returns uuid
-
-
- SHA2 algorithms were added to OpenSSL in version 0.9.8. For
- older versions, pgcrypto> will use built-in code.
-
-
Any digest algorithm OpenSSL supports is automatically picked up.
@@ -1243,12 +1237,6 @@ gen_random_uuid() returns uuid
explicitly.
-
-
- AES is included in OpenSSL since version 0.9.7. For
- older versions, pgcrypto> will use built-in code.
-
-
diff --git a/src/backend/libpq/be-secure-openssl.c b/src/backend/libpq/be-secure-openssl.c
index f6adb155c6..e5f434ca17 100644
--- a/src/backend/libpq/be-secure-openssl.c
+++ b/src/backend/libpq/be-secure-openssl.c
@@ -53,10 +53,8 @@
#include
#include
-#if SSLEAY_VERSION_NUMBER >= 0x0907000L
#include
-#endif
-#if (OPENSSL_VERSION_NUMBER >= 0x0090800fL) && !defined(OPENSSL_NO_ECDH)
+#ifndef OPENSSL_NO_ECDH
#include
#endif
@@ -166,9 +164,7 @@ be_tls_init(void)
if (!SSL_context)
{
-#if SSLEAY_VERSION_NUMBER >= 0x0907000L
OPENSSL_config(NULL);
-#endif
SSL_library_init();
SSL_load_error_strings();
@@ -978,7 +974,7 @@ info_cb(const SSL *ssl, int type, int args)
static void
initialize_ecdh(void)
{
-#if (OPENSSL_VERSION_NUMBER >= 0x0090800fL) && !defined(OPENSSL_NO_ECDH)
+#ifndef OPENSSL_NO_ECDH
EC_KEY *ecdh;
int nid;
diff --git a/src/interfaces/libpq/fe-secure-openssl.c b/src/interfaces/libpq/fe-secure-openssl.c
index f6ce1c7a13..d8716128ec 100644
--- a/src/interfaces/libpq/fe-secure-openssl.c
+++ b/src/interfaces/libpq/fe-secure-openssl.c
@@ -54,9 +54,7 @@
#endif
#include
-#if (SSLEAY_VERSION_NUMBER >= 0x00907000L)
#include
-#endif
#ifdef USE_SSL_ENGINE
#include
#endif
@@ -848,9 +846,7 @@ pgtls_init(PGconn *conn)
{
if (pq_init_ssl_lib)
{
-#if SSLEAY_VERSION_NUMBER >= 0x00907000L
OPENSSL_config(NULL);
-#endif
SSL_library_init();
SSL_load_error_strings();
}
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index 1183323a44..a94ead04ff 100644
--- a/src/interfaces/libpq/libpq-int.h
+++ b/src/interfaces/libpq/libpq-int.h
@@ -77,7 +77,7 @@ typedef struct
#include
#include
-#if (SSLEAY_VERSION_NUMBER >= 0x00907000L) && !defined(OPENSSL_NO_ENGINE)
+#ifndef OPENSSL_NO_ENGINE
#define USE_SSL_ENGINE
#endif
#endif /* USE_OPENSSL */
--
cgit v1.2.3
From 39b691f251167bbb3d49203abfb39d430f68f411 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Fri, 2 Sep 2016 17:29:31 -0400
Subject: Don't require dynamic timezone abbreviations to match underlying time
zone.
Previously, we threw an error if a dynamic timezone abbreviation did not
match any abbreviation recorded in the referenced IANA time zone entry.
That seemed like a good consistency check at the time, but it turns out
that a number of the abbreviations in the IANA database are things that
Olson and crew made up out of whole cloth. Their current policy is to
remove such names in favor of using simple numeric offsets. Perhaps
unsurprisingly, a lot of these made-up abbreviations have varied in meaning
over time, which meant that our commit b2cbced9e and later changes made
them into dynamic abbreviations. So with newer IANA database versions
that don't mention these abbreviations at all, we fail, as reported in bug
#14307 from Neil Anderson. It's worse than just a few unused-in-the-wild
abbreviations not working, because the pg_timezone_abbrevs view stops
working altogether (since its underlying function tries to compute the
whole view result in one call).
We considered deleting these abbreviations from our abbreviations list, but
the problem with that is that we can't stay ahead of possible future IANA
changes. Instead, let's leave the abbreviations list alone, and treat any
"orphaned" dynamic abbreviation as just meaning the referenced time zone.
It will behave a bit differently than it used to, in that you can't any
longer override the zone's standard vs. daylight rule by using the "wrong"
abbreviation of a pair, but that's better than failing entirely. (Also,
this solution can be interpreted as adding a small new feature, which is
that any abbreviation a user wants can be defined as referencing a time
zone name.)
Back-patch to all supported branches, since this problem affects all
of them when using tzdata 2016f or newer.
Report: <20160902031551.15674.67337@wrigleys.postgresql.org>
Discussion: <6189.1472820913@sss.pgh.pa.us>
---
doc/src/sgml/catalogs.sgml | 7 +++
doc/src/sgml/datetime.sgml | 41 +++++++++++----
src/backend/utils/adt/datetime.c | 85 +++++++++++++++++++++++--------
src/test/regress/expected/timestamptz.out | 20 ++++++++
src/test/regress/sql/timestamptz.sql | 11 ++++
5 files changed, 132 insertions(+), 32 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 4e09e06aed..322d8d6dc7 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -9811,6 +9811,13 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
+
+ While most timezone abbreviations represent fixed offsets from UTC,
+ there are some that have historically varied in value
+ (see for more information).
+ In such cases this view presents their current meaning.
+
+
diff --git a/doc/src/sgml/datetime.sgml b/doc/src/sgml/datetime.sgml
index ffd0715128..ef9139f9e3 100644
--- a/doc/src/sgml/datetime.sgml
+++ b/doc/src/sgml/datetime.sgml
@@ -384,19 +384,38 @@
A zone_abbreviation is just the abbreviation
- being defined. The offset is the equivalent
- offset in seconds from UTC, positive being east from Greenwich and
- negative being west. For example, -18000 would be five hours west
- of Greenwich, or North American east coast standard time. D>
- indicates that the zone name represents local daylight-savings time rather
- than standard time. Alternatively, a time_zone_name> can
- be given, in which case that time zone definition is consulted, and the
- abbreviation's meaning in that zone is used. This alternative is
- recommended only for abbreviations whose meaning has historically varied,
- as looking up the meaning is noticeably more expensive than just using
- a fixed integer value.
+ being defined. An offset is an integer giving
+ the equivalent offset in seconds from UTC, positive being east from
+ Greenwich and negative being west. For example, -18000 would be five
+ hours west of Greenwich, or North American east coast standard time.
+ D> indicates that the zone name represents local
+ daylight-savings time rather than standard time.
+
+ Alternatively, a time_zone_name> can be given, referencing
+ a zone name defined in the IANA timezone database. The zone's definition
+ is consulted to see whether the abbreviation is or has been in use in
+ that zone, and if so, the appropriate meaning is used — that is,
+ the meaning that was currently in use at the timestamp whose value is
+ being determined, or the meaning in use immediately before that if it
+ wasn't current at that time, or the oldest meaning if it was used only
+ after that time. This behavior is essential for dealing with
+ abbreviations whose meaning has historically varied. It is also allowed
+ to define an abbreviation in terms of a zone name in which that
+ abbreviation does not appear; then using the abbreviation is just
+ equivalent to writing out the zone name.
+
+
+
+
+ Using a simple integer offset is preferred
+ when defining an abbreviation whose offset from UTC has never changed,
+ as such abbreviations are much cheaper to process than those that
+ require consulting a time zone definition.
+
+
+
The @INCLUDE> syntax allows inclusion of another file in the
.../share/timezonesets/> directory. Inclusion can be nested,
diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index 965c3b4ff0..45ba7cd906 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -56,8 +56,9 @@ static void AdjustFractDays(double frac, struct pg_tm * tm, fsec_t *fsec,
int scale);
static int DetermineTimeZoneOffsetInternal(struct pg_tm * tm, pg_tz *tzp,
pg_time_t *tp);
-static int DetermineTimeZoneAbbrevOffsetInternal(pg_time_t t, const char *abbr,
- pg_tz *tzp, int *isdst);
+static bool DetermineTimeZoneAbbrevOffsetInternal(pg_time_t t,
+ const char *abbr, pg_tz *tzp,
+ int *offset, int *isdst);
static pg_tz *FetchDynamicTimeZone(TimeZoneAbbrevTable *tbl, const datetkn *tp);
@@ -1689,19 +1690,40 @@ overflow:
* This differs from the behavior of DetermineTimeZoneOffset() in that a
* standard-time or daylight-time abbreviation forces use of the corresponding
* GMT offset even when the zone was then in DS or standard time respectively.
+ * (However, that happens only if we can match the given abbreviation to some
+ * abbreviation that appears in the IANA timezone data. Otherwise, we fall
+ * back to doing DetermineTimeZoneOffset().)
*/
int
DetermineTimeZoneAbbrevOffset(struct pg_tm * tm, const char *abbr, pg_tz *tzp)
{
pg_time_t t;
+ int zone_offset;
+ int abbr_offset;
+ int abbr_isdst;
/*
* Compute the UTC time we want to probe at. (In event of overflow, we'll
* probe at the epoch, which is a bit random but probably doesn't matter.)
*/
- (void) DetermineTimeZoneOffsetInternal(tm, tzp, &t);
+ zone_offset = DetermineTimeZoneOffsetInternal(tm, tzp, &t);
- return DetermineTimeZoneAbbrevOffsetInternal(t, abbr, tzp, &tm->tm_isdst);
+ /*
+ * Try to match the abbreviation to something in the zone definition.
+ */
+ if (DetermineTimeZoneAbbrevOffsetInternal(t, abbr, tzp,
+ &abbr_offset, &abbr_isdst))
+ {
+ /* Success, so use the abbrev-specific answers. */
+ tm->tm_isdst = abbr_isdst;
+ return abbr_offset;
+ }
+
+ /*
+ * No match, so use the answers we already got from
+ * DetermineTimeZoneOffsetInternal.
+ */
+ return zone_offset;
}
@@ -1715,19 +1737,41 @@ DetermineTimeZoneAbbrevOffsetTS(TimestampTz ts, const char *abbr,
pg_tz *tzp, int *isdst)
{
pg_time_t t = timestamptz_to_time_t(ts);
+ int zone_offset;
+ int abbr_offset;
+ int tz;
+ struct pg_tm tm;
+ fsec_t fsec;
- return DetermineTimeZoneAbbrevOffsetInternal(t, abbr, tzp, isdst);
+ /*
+ * If the abbrev matches anything in the zone data, this is pretty easy.
+ */
+ if (DetermineTimeZoneAbbrevOffsetInternal(t, abbr, tzp,
+ &abbr_offset, isdst))
+ return abbr_offset;
+
+ /*
+ * Else, break down the timestamp so we can use DetermineTimeZoneOffset.
+ */
+ if (timestamp2tm(ts, &tz, &tm, &fsec, NULL, tzp) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ zone_offset = DetermineTimeZoneOffset(&tm, tzp);
+ *isdst = tm.tm_isdst;
+ return zone_offset;
}
/* DetermineTimeZoneAbbrevOffsetInternal()
*
* Workhorse for above two functions: work from a pg_time_t probe instant.
- * DST status is returned into *isdst.
+ * On success, return GMT offset and DST status into *offset and *isdst.
*/
-static int
-DetermineTimeZoneAbbrevOffsetInternal(pg_time_t t, const char *abbr,
- pg_tz *tzp, int *isdst)
+static bool
+DetermineTimeZoneAbbrevOffsetInternal(pg_time_t t, const char *abbr, pg_tz *tzp,
+ int *offset, int *isdst)
{
char upabbr[TZ_STRLEN_MAX + 1];
unsigned char *p;
@@ -1739,18 +1783,17 @@ DetermineTimeZoneAbbrevOffsetInternal(pg_time_t t, const char *abbr,
*p = pg_toupper(*p);
/* Look up the abbrev's meaning at this time in this zone */
- if (!pg_interpret_timezone_abbrev(upabbr,
- &t,
- &gmtoff,
- isdst,
- tzp))
- ereport(ERROR,
- (errcode(ERRCODE_CONFIG_FILE_ERROR),
- errmsg("time zone abbreviation \"%s\" is not used in time zone \"%s\"",
- abbr, pg_get_timezone_name(tzp))));
-
- /* Change sign to agree with DetermineTimeZoneOffset() */
- return (int) -gmtoff;
+ if (pg_interpret_timezone_abbrev(upabbr,
+ &t,
+ &gmtoff,
+ isdst,
+ tzp))
+ {
+ /* Change sign to agree with DetermineTimeZoneOffset() */
+ *offset = (int) -gmtoff;
+ return true;
+ }
+ return false;
}
diff --git a/src/test/regress/expected/timestamptz.out b/src/test/regress/expected/timestamptz.out
index 67f26db204..2bfc13ad72 100644
--- a/src/test/regress/expected/timestamptz.out
+++ b/src/test/regress/expected/timestamptz.out
@@ -2603,3 +2603,23 @@ SELECT '2007-12-09 07:30:00 UTC'::timestamptz AT TIME ZONE 'VET';
Sun Dec 09 03:00:00 2007
(1 row)
+--
+-- Test that the pg_timezone_names and pg_timezone_abbrevs views are
+-- more-or-less working. We can't test their contents in any great detail
+-- without the outputs changing anytime IANA updates the underlying data,
+-- but it seems reasonable to expect at least one entry per major meridian.
+-- (At the time of writing, the actual counts are around 38 because of
+-- zones using fractional GMT offsets, so this is a pretty loose test.)
+--
+select count(distinct utc_offset) >= 24 as ok from pg_timezone_names;
+ ok
+----
+ t
+(1 row)
+
+select count(distinct utc_offset) >= 24 as ok from pg_timezone_abbrevs;
+ ok
+----
+ t
+(1 row)
+
diff --git a/src/test/regress/sql/timestamptz.sql b/src/test/regress/sql/timestamptz.sql
index c023095bb8..ce9d1c2fa1 100644
--- a/src/test/regress/sql/timestamptz.sql
+++ b/src/test/regress/sql/timestamptz.sql
@@ -468,3 +468,14 @@ SELECT '2007-12-09 07:00:00 UTC'::timestamptz AT TIME ZONE 'VET';
SELECT '2007-12-09 07:00:01 UTC'::timestamptz AT TIME ZONE 'VET';
SELECT '2007-12-09 07:29:59 UTC'::timestamptz AT TIME ZONE 'VET';
SELECT '2007-12-09 07:30:00 UTC'::timestamptz AT TIME ZONE 'VET';
+
+--
+-- Test that the pg_timezone_names and pg_timezone_abbrevs views are
+-- more-or-less working. We can't test their contents in any great detail
+-- without the outputs changing anytime IANA updates the underlying data,
+-- but it seems reasonable to expect at least one entry per major meridian.
+-- (At the time of writing, the actual counts are around 38 because of
+-- zones using fractional GMT offsets, so this is a pretty loose test.)
+--
+select count(distinct utc_offset) >= 24 as ok from pg_timezone_names;
+select count(distinct utc_offset) >= 24 as ok from pg_timezone_abbrevs;
--
cgit v1.2.3
From 0c40ab3a88edf654165e562deee0c303a6ebef5e Mon Sep 17 00:00:00 2001
From: Simon Riggs
Date: Sat, 3 Sep 2016 16:19:11 +0100
Subject: Fix wording of logical decoding concepts
Be specific about conditions under which we emit >1 copy of message
Craig Ringer
---
doc/src/sgml/logicaldecoding.sgml | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/logicaldecoding.sgml b/doc/src/sgml/logicaldecoding.sgml
index c42082002e..484915d042 100644
--- a/doc/src/sgml/logicaldecoding.sgml
+++ b/doc/src/sgml/logicaldecoding.sgml
@@ -12,7 +12,6 @@
Changes are sent out in streams identified by logical replication slots.
- Each stream outputs each change exactly once.
@@ -204,8 +203,7 @@ $ pg_recvlogical -d postgres --slot test --drop-slot
In the context of logical replication, a slot represents a stream of
changes that can be replayed to a client in the order they were made on
the origin server. Each slot streams a sequence of changes from a single
- database, sending each change exactly once (except when peeking forward
- in the stream).
+ database.
@@ -221,6 +219,20 @@ $ pg_recvlogical -d postgres --slot test --drop-slot
independently of the connection using them and are crash-safe.
+
+ A logical slot will emit each change just once in normal operation.
+ The current position of each slot is persisted only at checkpoint, so in
+ the case of a crash the slot may return to an earlier LSN, which will
+ then cause recent changes to be resent when the server restarts.
+ Logical decoding clients are responsible for avoiding ill effects from
+ handling the same message more than once. Clients may wish to record
+ the last LSN they saw when decoding and skip over any repeated data or
+ (when using the replication protocol) request that decoding start from
+ that LSN rather than letting the server determine the start point.
+ The Replication Progress Tracking feature is designed for this purpose,
+ refer to replication origins.
+
+
Multiple independent slots may exist for a single database. Each slot has
its own state, allowing different consumers to receive changes from
--
cgit v1.2.3
From 35250b6ad7a8ece5cfe54c0316c180df19f36c13 Mon Sep 17 00:00:00 2001
From: Simon Riggs
Date: Sat, 3 Sep 2016 17:48:01 +0100
Subject: New recovery target recovery_target_lsn
Michael Paquier
---
doc/src/sgml/recovery-config.sgml | 24 +++++++--
src/backend/access/transam/recovery.conf.sample | 6 ++-
src/backend/access/transam/xlog.c | 70 +++++++++++++++++++++++++
src/include/access/xlog.h | 1 +
src/test/recovery/t/003_recovery_targets.pl | 28 ++++++++--
5 files changed, 120 insertions(+), 9 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/recovery-config.sgml b/doc/src/sgml/recovery-config.sgml
index 26af221745..de3fb10f5b 100644
--- a/doc/src/sgml/recovery-config.sgml
+++ b/doc/src/sgml/recovery-config.sgml
@@ -157,9 +157,10 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows
By default, recovery will recover to the end of the WAL log. The
following parameters can be used to specify an earlier stopping point.
At most one of recovery_target>,
- recovery_target_name>, recovery_target_time>, or
- recovery_target_xid> can be used; if more than one of these
- is specified in the configuration file, the last entry will be used.
+ recovery_target_lsn>, recovery_target_name>,
+ recovery_target_time>, or recovery_target_xid>
+ can be used; if more than one of these is specified in the configuration
+ file, the last entry will be used.
@@ -232,6 +233,23 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows
+
+
+ recovery_target_lsn (pg_lsn)
+
+ recovery_target_lsn> recovery parameter
+
+
+
+
+ This parameter specifies the LSN of the transaction log location up
+ to which recovery will proceed. The precise stopping point is also
+ influenced by . This
+ parameter is parsed using the system data type
+ pg_lsn>.
+
+
+
diff --git a/src/backend/access/transam/recovery.conf.sample b/src/backend/access/transam/recovery.conf.sample
index b777400d03..7a16751541 100644
--- a/src/backend/access/transam/recovery.conf.sample
+++ b/src/backend/access/transam/recovery.conf.sample
@@ -67,8 +67,8 @@
# must set a recovery target.
#
# You may set a recovery target either by transactionId, by name,
-# or by timestamp. Recovery may either include or exclude the
-# transaction(s) with the recovery target value (ie, stop either
+# by timestamp or by WAL position (LSN). Recovery may either include or
+# exclude the transaction(s) with the recovery target value (ie, stop either
# just after or just before the given target, respectively).
#
#
@@ -78,6 +78,8 @@
#
#recovery_target_xid = ''
#
+#recovery_target_lsn = '' # e.g. '0/70006B8'
+#
#recovery_target_inclusive = true
#
#
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 0b991bb91d..2189c22c64 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -67,6 +67,7 @@
#include "utils/builtins.h"
#include "utils/guc.h"
#include "utils/memutils.h"
+#include "utils/pg_lsn.h"
#include "utils/ps_status.h"
#include "utils/relmapper.h"
#include "utils/snapmgr.h"
@@ -254,6 +255,7 @@ static RecoveryTargetAction recoveryTargetAction = RECOVERY_TARGET_ACTION_PAUSE;
static TransactionId recoveryTargetXid;
static TimestampTz recoveryTargetTime;
static char *recoveryTargetName;
+static XLogRecPtr recoveryTargetLSN;
static int recovery_min_apply_delay = 0;
static TimestampTz recoveryDelayUntilTime;
@@ -275,6 +277,7 @@ static bool fast_promote = false;
*/
static TransactionId recoveryStopXid;
static TimestampTz recoveryStopTime;
+static XLogRecPtr recoveryStopLSN;
static char recoveryStopName[MAXFNAMELEN];
static bool recoveryStopAfter;
@@ -5078,6 +5081,23 @@ readRecoveryCommandFile(void)
(errmsg_internal("recovery_target_name = '%s'",
recoveryTargetName)));
}
+ else if (strcmp(item->name, "recovery_target_lsn") == 0)
+ {
+ recoveryTarget = RECOVERY_TARGET_LSN;
+
+ /*
+ * Convert the LSN string given by the user to XLogRecPtr form.
+ */
+ recoveryTargetLSN =
+ DatumGetLSN(DirectFunctionCall3(pg_lsn_in,
+ CStringGetDatum(item->value),
+ ObjectIdGetDatum(InvalidOid),
+ Int32GetDatum(-1)));
+ ereport(DEBUG2,
+ (errmsg_internal("recovery_target_lsn = '%X/%X'",
+ (uint32) (recoveryTargetLSN >> 32),
+ (uint32) recoveryTargetLSN)));
+ }
else if (strcmp(item->name, "recovery_target") == 0)
{
if (strcmp(item->value, "immediate") == 0)
@@ -5400,8 +5420,26 @@ recoveryStopsBefore(XLogReaderState *record)
recoveryStopAfter = false;
recoveryStopXid = InvalidTransactionId;
+ recoveryStopLSN = InvalidXLogRecPtr;
+ recoveryStopTime = 0;
+ recoveryStopName[0] = '\0';
+ return true;
+ }
+
+ /* Check if target LSN has been reached */
+ if (recoveryTarget == RECOVERY_TARGET_LSN &&
+ !recoveryTargetInclusive &&
+ record->ReadRecPtr >= recoveryTargetLSN)
+ {
+ recoveryStopAfter = false;
+ recoveryStopXid = InvalidTransactionId;
+ recoveryStopLSN = record->ReadRecPtr;
recoveryStopTime = 0;
recoveryStopName[0] = '\0';
+ ereport(LOG,
+ (errmsg("recovery stopping before WAL position (LSN) \"%X/%X\"",
+ (uint32) (recoveryStopLSN >> 32),
+ (uint32) recoveryStopLSN)));
return true;
}
@@ -5479,6 +5517,7 @@ recoveryStopsBefore(XLogReaderState *record)
recoveryStopAfter = false;
recoveryStopXid = recordXid;
recoveryStopTime = recordXtime;
+ recoveryStopLSN = InvalidXLogRecPtr;
recoveryStopName[0] = '\0';
if (isCommit)
@@ -5532,6 +5571,7 @@ recoveryStopsAfter(XLogReaderState *record)
{
recoveryStopAfter = true;
recoveryStopXid = InvalidTransactionId;
+ recoveryStopLSN = InvalidXLogRecPtr;
(void) getRecordTimestamp(record, &recoveryStopTime);
strlcpy(recoveryStopName, recordRestorePointData->rp_name, MAXFNAMELEN);
@@ -5543,6 +5583,23 @@ recoveryStopsAfter(XLogReaderState *record)
}
}
+ /* Check if the target LSN has been reached */
+ if (recoveryTarget == RECOVERY_TARGET_LSN &&
+ recoveryTargetInclusive &&
+ record->ReadRecPtr >= recoveryTargetLSN)
+ {
+ recoveryStopAfter = true;
+ recoveryStopXid = InvalidTransactionId;
+ recoveryStopLSN = record->ReadRecPtr;
+ recoveryStopTime = 0;
+ recoveryStopName[0] = '\0';
+ ereport(LOG,
+ (errmsg("recovery stopping after WAL position (LSN) \"%X/%X\"",
+ (uint32) (recoveryStopLSN >> 32),
+ (uint32) recoveryStopLSN)));
+ return true;
+ }
+
if (rmid != RM_XACT_ID)
return false;
@@ -5598,6 +5655,7 @@ recoveryStopsAfter(XLogReaderState *record)
recoveryStopAfter = true;
recoveryStopXid = recordXid;
recoveryStopTime = recordXtime;
+ recoveryStopLSN = InvalidXLogRecPtr;
recoveryStopName[0] = '\0';
if (xact_info == XLOG_XACT_COMMIT ||
@@ -5629,6 +5687,7 @@ recoveryStopsAfter(XLogReaderState *record)
recoveryStopAfter = true;
recoveryStopXid = InvalidTransactionId;
recoveryStopTime = 0;
+ recoveryStopLSN = InvalidXLogRecPtr;
recoveryStopName[0] = '\0';
return true;
}
@@ -6055,6 +6114,11 @@ StartupXLOG(void)
ereport(LOG,
(errmsg("starting point-in-time recovery to \"%s\"",
recoveryTargetName)));
+ else if (recoveryTarget == RECOVERY_TARGET_LSN)
+ ereport(LOG,
+ (errmsg("starting point-in-time recovery to WAL position (LSN) \"%X/%X\"",
+ (uint32) (recoveryTargetLSN >> 32),
+ (uint32) recoveryTargetLSN)));
else if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE)
ereport(LOG,
(errmsg("starting point-in-time recovery to earliest consistent point")));
@@ -7124,6 +7188,12 @@ StartupXLOG(void)
"%s %s\n",
recoveryStopAfter ? "after" : "before",
timestamptz_to_str(recoveryStopTime));
+ else if (recoveryTarget == RECOVERY_TARGET_LSN)
+ snprintf(reason, sizeof(reason),
+ "%s LSN %X/%X\n",
+ recoveryStopAfter ? "after" : "before",
+ (uint32 ) (recoveryStopLSN >> 32),
+ (uint32) recoveryStopLSN);
else if (recoveryTarget == RECOVERY_TARGET_NAME)
snprintf(reason, sizeof(reason),
"at restore point \"%s\"",
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 14b7f7f459..c9f332c908 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -83,6 +83,7 @@ typedef enum
RECOVERY_TARGET_XID,
RECOVERY_TARGET_TIME,
RECOVERY_TARGET_NAME,
+ RECOVERY_TARGET_LSN,
RECOVERY_TARGET_IMMEDIATE
} RecoveryTargetType;
diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl
index d1f6d78388..a82545bf6f 100644
--- a/src/test/recovery/t/003_recovery_targets.pl
+++ b/src/test/recovery/t/003_recovery_targets.pl
@@ -3,7 +3,7 @@ use strict;
use warnings;
use PostgresNode;
use TestLib;
-use Test::More tests => 7;
+use Test::More tests => 9;
# Create and test a standby from given backup, with a certain
# recovery target.
@@ -86,6 +86,16 @@ my $lsn4 =
$node_master->safe_psql('postgres',
"SELECT pg_create_restore_point('$recovery_name');");
+# And now for a recovery target LSN
+$node_master->safe_psql('postgres',
+ "INSERT INTO tab_int VALUES (generate_series(4001,5000))");
+my $recovery_lsn = $node_master->safe_psql('postgres', "SELECT pg_current_xlog_location()");
+my $lsn5 =
+ $node_master->safe_psql('postgres', "SELECT pg_current_xlog_location();");
+
+$node_master->safe_psql('postgres',
+ "INSERT INTO tab_int VALUES (generate_series(5001,6000))");
+
# Force archiving of WAL file
$node_master->safe_psql('postgres', "SELECT pg_switch_xlog()");
@@ -102,6 +112,9 @@ test_recovery_standby('time', 'standby_3', $node_master, \@recovery_params,
@recovery_params = ("recovery_target_name = '$recovery_name'");
test_recovery_standby('name', 'standby_4', $node_master, \@recovery_params,
"4000", $lsn4);
+@recovery_params = ("recovery_target_lsn = '$recovery_lsn'");
+test_recovery_standby('LSN', 'standby_5', $node_master, \@recovery_params,
+ "5000", $lsn5);
# Multiple targets
# Last entry has priority (note that an array respects the order of items
@@ -111,16 +124,23 @@ test_recovery_standby('name', 'standby_4', $node_master, \@recovery_params,
"recovery_target_xid = '$recovery_txid'",
"recovery_target_time = '$recovery_time'");
test_recovery_standby('name + XID + time',
- 'standby_5', $node_master, \@recovery_params, "3000", $lsn3);
+ 'standby_6', $node_master, \@recovery_params, "3000", $lsn3);
@recovery_params = (
"recovery_target_time = '$recovery_time'",
"recovery_target_name = '$recovery_name'",
"recovery_target_xid = '$recovery_txid'");
test_recovery_standby('time + name + XID',
- 'standby_6', $node_master, \@recovery_params, "2000", $lsn2);
+ 'standby_7', $node_master, \@recovery_params, "2000", $lsn2);
@recovery_params = (
"recovery_target_xid = '$recovery_txid'",
"recovery_target_time = '$recovery_time'",
"recovery_target_name = '$recovery_name'");
test_recovery_standby('XID + time + name',
- 'standby_7', $node_master, \@recovery_params, "4000", $lsn4);
+ 'standby_8', $node_master, \@recovery_params, "4000", $lsn4);
+@recovery_params = (
+ "recovery_target_xid = '$recovery_txid'",
+ "recovery_target_time = '$recovery_time'",
+ "recovery_target_name = '$recovery_name'",
+ "recovery_target_lsn = '$recovery_lsn'",);
+test_recovery_standby('XID + time + name + LSN',
+ 'standby_9', $node_master, \@recovery_params, "5000", $lsn5);
--
cgit v1.2.3
From 6591f4226c81104f7746da6a5c00519919c560ae Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Sat, 3 Sep 2016 15:29:03 -0400
Subject: Improve readability of the output of psql's \timing command.
In addition to the existing decimal-milliseconds output value,
display the same value in mm:ss.fff format if it exceeds one second.
Tack on hours and even days fields if the interval is large enough.
This avoids needing mental arithmetic to convert the values into
customary time units.
Corey Huinker, reviewed by Gerdan Santos; bikeshedding by many
Discussion:
---
doc/src/sgml/ref/psql-ref.sgml | 7 ++++--
src/bin/psql/common.c | 56 ++++++++++++++++++++++++++++++++++++++++--
2 files changed, 59 insertions(+), 4 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml
index 8a66ce7983..4806e77be7 100644
--- a/doc/src/sgml/ref/psql-ref.sgml
+++ b/doc/src/sgml/ref/psql-ref.sgml
@@ -2789,8 +2789,11 @@ testdb=> \setenv LESS -imx4F\timing [ on | off ]
- Without parameter, toggles a display of how long each SQL statement
- takes, in milliseconds. With parameter, sets same.
+ With a parameter, turns displaying of how long each SQL statement
+ takes on or off. Without a parameter, toggles the display between
+ on and off. The display is in milliseconds; intervals longer than
+ 1 second are also shown in minutes:seconds format, with hours and
+ days fields added if needed.
diff --git a/src/bin/psql/common.c b/src/bin/psql/common.c
index 7399950284..a7789dfa53 100644
--- a/src/bin/psql/common.c
+++ b/src/bin/psql/common.c
@@ -10,6 +10,7 @@
#include
#include
+#include
#include
#ifndef WIN32
#include /* for write() */
@@ -531,6 +532,57 @@ ClearOrSaveResult(PGresult *result)
}
+/*
+ * Print microtiming output. Always print raw milliseconds; if the interval
+ * is >= 1 second, also break it down into days/hours/minutes/seconds.
+ */
+static void
+PrintTiming(double elapsed_msec)
+{
+ double seconds;
+ double minutes;
+ double hours;
+ double days;
+
+ if (elapsed_msec < 1000.0)
+ {
+ /* This is the traditional (pre-v10) output format */
+ printf(_("Time: %.3f ms\n"), elapsed_msec);
+ return;
+ }
+
+ /*
+ * Note: we could print just seconds, in a format like %06.3f, when the
+ * total is less than 1min. But that's hard to interpret unless we tack
+ * on "s" or otherwise annotate it. Forcing the display to include
+ * minutes seems like a better solution.
+ */
+ seconds = elapsed_msec / 1000.0;
+ minutes = floor(seconds / 60.0);
+ seconds -= 60.0 * minutes;
+ if (minutes < 60.0)
+ {
+ printf(_("Time: %.3f ms (%02d:%06.3f)\n"),
+ elapsed_msec, (int) minutes, seconds);
+ return;
+ }
+
+ hours = floor(minutes / 60.0);
+ minutes -= 60.0 * hours;
+ if (hours < 24.0)
+ {
+ printf(_("Time: %.3f ms (%02d:%02d:%06.3f)\n"),
+ elapsed_msec, (int) hours, (int) minutes, seconds);
+ return;
+ }
+
+ days = floor(hours / 24.0);
+ hours -= 24.0 * days;
+ printf(_("Time: %.3f ms (%.0f d %02d:%02d:%06.3f)\n"),
+ elapsed_msec, days, (int) hours, (int) minutes, seconds);
+}
+
+
/*
* PSQLexec
*
@@ -679,7 +731,7 @@ PSQLexecWatch(const char *query, const printQueryOpt *opt)
/* Possible microtiming output */
if (pset.timing)
- printf(_("Time: %.3f ms\n"), elapsed_msec);
+ PrintTiming(elapsed_msec);
return 1;
}
@@ -1332,7 +1384,7 @@ SendQuery(const char *query)
/* Possible microtiming output */
if (pset.timing)
- printf(_("Time: %.3f ms\n"), elapsed_msec);
+ PrintTiming(elapsed_msec);
/* check for events that may occur during query execution */
--
cgit v1.2.3
From 5a072244919a92b2c757b2e3985191f02d674627 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Sun, 4 Sep 2016 13:19:54 -0400
Subject: Update release notes to mention need for ALTER EXTENSION UPDATE.
Maybe we ought to make pg_upgrade do this for you, but it won't happen
in 9.6, so call out the need for it as a migration consideration.
---
doc/src/sgml/release-9.6.sgml | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
(limited to 'doc/src')
diff --git a/doc/src/sgml/release-9.6.sgml b/doc/src/sgml/release-9.6.sgml
index 895d88e768..578c3d1fdb 100644
--- a/doc/src/sgml/release-9.6.sgml
+++ b/doc/src/sgml/release-9.6.sgml
@@ -303,6 +303,26 @@ This commit is also listed under libpq and psql
+
+
+
+ Update extension functions to be marked parallel-safe where
+ appropriate (Andreas Karlsson)
+
+
+
+ Many of the standard extensions have been updated to allow their
+ functions to be executed within parallel query worker processes.
+ These changes will not take effect in
+ databases pg_upgrade>'d from prior versions unless
+ you apply ALTER EXTENSION UPDATE> to each such extension
+ (in each database of a cluster).
+
+
+
--
cgit v1.2.3
From ec03f4121cec6cf885bf40d9dfb53b8368251e99 Mon Sep 17 00:00:00 2001
From: Simon Riggs
Date: Mon, 5 Sep 2016 09:47:49 +0100
Subject: Document LSN acronym in WAL Internals
We previously didn't mention what an LSN actually was.
Simon Riggs and Michael Paquier
---
doc/src/sgml/acronyms.sgml | 10 ++++++++++
doc/src/sgml/wal.sgml | 16 ++++++++++++++++
2 files changed, 26 insertions(+)
(limited to 'doc/src')
diff --git a/doc/src/sgml/acronyms.sgml b/doc/src/sgml/acronyms.sgml
index 38f111ef9d..bf2273fa8a 100644
--- a/doc/src/sgml/acronyms.sgml
+++ b/doc/src/sgml/acronyms.sgml
@@ -380,6 +380,16 @@
+
+ LSN
+
+
+ Log Sequence Number, see pg_lsn>
+ and WAL Internals.
+
+
+
+
MSVC
diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml
index 503ea8a2a7..9ae6547721 100644
--- a/doc/src/sgml/wal.sgml
+++ b/doc/src/sgml/wal.sgml
@@ -724,6 +724,10 @@
WAL Internals
+
+ LSN
+
+
WAL is automatically enabled; no action is
required from the administrator except ensuring that the
@@ -732,6 +736,18 @@
linkend="wal-configuration">).
+
+ WAL records are appended to the WAL
+ logs as each new record is written. The insert position is described by
+ a Log Sequence Number (LSN) that is a byte offset into
+ the logs, increasing monotonically with each new record.
+ LSN values are returned as the datatype
+ pg_lsn>. Values can be
+ compared to calculate the volume of WAL data that
+ separates them, so they are used to measure the progress of replication
+ and recovery.
+
+
WAL logs are stored in the directory
pg_xlog under the data directory, as a set of
--
cgit v1.2.3
From 15bc038f9bcd1a9af3f625caffafc7c20322202d Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Mon, 5 Sep 2016 12:59:55 -0400
Subject: Relax transactional restrictions on ALTER TYPE ... ADD VALUE.
To prevent possibly breaking indexes on enum columns, we must keep
uncommitted enum values from getting stored in tables, unless we
can be sure that any such column is new in the current transaction.
Formerly, we enforced this by disallowing ALTER TYPE ... ADD VALUE
from being executed at all in a transaction block, unless the target
enum type had been created in the current transaction. This patch
removes that restriction, and instead insists that an uncommitted enum
value can't be referenced unless it belongs to an enum type created
in the same transaction as the value. Per discussion, this should be
a bit less onerous. It does require each function that could possibly
return a new enum value to SQL operations to check this restriction,
but there aren't so many of those that this seems unmaintainable.
Andrew Dunstan and Tom Lane
Discussion: <4075.1459088427@sss.pgh.pa.us>
---
doc/src/sgml/ref/alter_type.sgml | 6 ++-
src/backend/commands/typecmds.c | 21 +-------
src/backend/tcop/utility.c | 2 +-
src/backend/utils/adt/enum.c | 104 +++++++++++++++++++++++++++++++++++++
src/backend/utils/errcodes.txt | 1 +
src/include/commands/typecmds.h | 2 +-
src/test/regress/expected/enum.out | 65 +++++++++++++++++++----
src/test/regress/sql/enum.sql | 30 ++++++++---
8 files changed, 191 insertions(+), 40 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/alter_type.sgml b/doc/src/sgml/ref/alter_type.sgml
index 9789881a5c..aec73f6285 100644
--- a/doc/src/sgml/ref/alter_type.sgml
+++ b/doc/src/sgml/ref/alter_type.sgml
@@ -266,8 +266,10 @@ ALTER TYPE name ADD VALUE [ IF NOT
Notes
- ALTER TYPE ... ADD VALUE> (the form that adds a new value to an
- enum type) cannot be executed inside a transaction block.
+ If ALTER TYPE ... ADD VALUE> (the form that adds a new value to
+ an enum type) is executed inside a transaction block, the new value cannot
+ be used until after the transaction has been committed, except in the case
+ that the enum type itself was created earlier in the same transaction.
diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c
index ce04211067..8e7be78f65 100644
--- a/src/backend/commands/typecmds.c
+++ b/src/backend/commands/typecmds.c
@@ -1221,7 +1221,7 @@ DefineEnum(CreateEnumStmt *stmt)
* Adds a new label to an existing enum.
*/
ObjectAddress
-AlterEnum(AlterEnumStmt *stmt, bool isTopLevel)
+AlterEnum(AlterEnumStmt *stmt)
{
Oid enum_type_oid;
TypeName *typename;
@@ -1236,25 +1236,6 @@ AlterEnum(AlterEnumStmt *stmt, bool isTopLevel)
if (!HeapTupleIsValid(tup))
elog(ERROR, "cache lookup failed for type %u", enum_type_oid);
- /*
- * Ordinarily we disallow adding values within transaction blocks, because
- * we can't cope with enum OID values getting into indexes and then having
- * their defining pg_enum entries go away. However, it's okay if the enum
- * type was created in the current transaction, since then there can be no
- * such indexes that wouldn't themselves go away on rollback. (We support
- * this case because pg_dump --binary-upgrade needs it.) We test this by
- * seeing if the pg_type row has xmin == current XID and is not
- * HEAP_UPDATED. If it is HEAP_UPDATED, we can't be sure whether the type
- * was created or only modified in this xact. So we are disallowing some
- * cases that could theoretically be safe; but fortunately pg_dump only
- * needs the simplest case.
- */
- if (HeapTupleHeaderGetXmin(tup->t_data) == GetCurrentTransactionId() &&
- !(tup->t_data->t_infomask & HEAP_UPDATED))
- /* safe to do inside transaction block */ ;
- else
- PreventTransactionChain(isTopLevel, "ALTER TYPE ... ADD");
-
/* Check it's an enum and check user has permission to ALTER the enum */
checkEnumOwner(tup);
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index ac50c2a03d..ac64135d5d 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -1359,7 +1359,7 @@ ProcessUtilitySlow(Node *parsetree,
break;
case T_AlterEnumStmt: /* ALTER TYPE (enum) */
- address = AlterEnum((AlterEnumStmt *) parsetree, isTopLevel);
+ address = AlterEnum((AlterEnumStmt *) parsetree);
break;
case T_ViewStmt: /* CREATE VIEW */
diff --git a/src/backend/utils/adt/enum.c b/src/backend/utils/adt/enum.c
index 135a54428a..47d5355027 100644
--- a/src/backend/utils/adt/enum.c
+++ b/src/backend/utils/adt/enum.c
@@ -19,6 +19,7 @@
#include "catalog/indexing.h"
#include "catalog/pg_enum.h"
#include "libpq/pqformat.h"
+#include "storage/procarray.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
@@ -31,6 +32,93 @@ static Oid enum_endpoint(Oid enumtypoid, ScanDirection direction);
static ArrayType *enum_range_internal(Oid enumtypoid, Oid lower, Oid upper);
+/*
+ * Disallow use of an uncommitted pg_enum tuple.
+ *
+ * We need to make sure that uncommitted enum values don't get into indexes.
+ * If they did, and if we then rolled back the pg_enum addition, we'd have
+ * broken the index because value comparisons will not work reliably without
+ * an underlying pg_enum entry. (Note that removal of the heap entry
+ * containing an enum value is not sufficient to ensure that it doesn't appear
+ * in upper levels of indexes.) To do this we prevent an uncommitted row from
+ * being used for any SQL-level purpose. This is stronger than necessary,
+ * since the value might not be getting inserted into a table or there might
+ * be no index on its column, but it's easy to enforce centrally.
+ *
+ * However, it's okay to allow use of uncommitted values belonging to enum
+ * types that were themselves created in the same transaction, because then
+ * any such index would also be new and would go away altogether on rollback.
+ * (This case is required by pg_upgrade.)
+ *
+ * This function needs to be called (directly or indirectly) in any of the
+ * functions below that could return an enum value to SQL operations.
+ */
+static void
+check_safe_enum_use(HeapTuple enumval_tup)
+{
+ TransactionId xmin;
+ Form_pg_enum en;
+ HeapTuple enumtyp_tup;
+
+ /*
+ * If the row is hinted as committed, it's surely safe. This provides a
+ * fast path for all normal use-cases.
+ */
+ if (HeapTupleHeaderXminCommitted(enumval_tup->t_data))
+ return;
+
+ /*
+ * Usually, a row would get hinted as committed when it's read or loaded
+ * into syscache; but just in case not, let's check the xmin directly.
+ */
+ xmin = HeapTupleHeaderGetXmin(enumval_tup->t_data);
+ if (!TransactionIdIsInProgress(xmin) &&
+ TransactionIdDidCommit(xmin))
+ return;
+
+ /* It is a new enum value, so check to see if the whole enum is new */
+ en = (Form_pg_enum) GETSTRUCT(enumval_tup);
+ enumtyp_tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(en->enumtypid));
+ if (!HeapTupleIsValid(enumtyp_tup))
+ elog(ERROR, "cache lookup failed for type %u", en->enumtypid);
+
+ /*
+ * We insist that the type have been created in the same (sub)transaction
+ * as the enum value. It would be safe to allow the type's originating
+ * xact to be a subcommitted child of the enum value's xact, but not vice
+ * versa (since we might now be in a subxact of the type's originating
+ * xact, which could roll back along with the enum value's subxact). The
+ * former case seems a sufficiently weird usage pattern as to not be worth
+ * spending code for, so we're left with a simple equality check.
+ *
+ * We also insist that the type's pg_type row not be HEAP_UPDATED. If it
+ * is, we can't tell whether the row was created or only modified in the
+ * apparent originating xact, so it might be older than that xact. (We do
+ * not worry whether the enum value is HEAP_UPDATED; if it is, we might
+ * think it's too new and throw an unnecessary error, but we won't allow
+ * an unsafe case.)
+ */
+ if (xmin == HeapTupleHeaderGetXmin(enumtyp_tup->t_data) &&
+ !(enumtyp_tup->t_data->t_infomask & HEAP_UPDATED))
+ {
+ /* same (sub)transaction, so safe */
+ ReleaseSysCache(enumtyp_tup);
+ return;
+ }
+
+ /*
+ * There might well be other tests we could do here to narrow down the
+ * unsafe conditions, but for now just raise an exception.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_UNSAFE_NEW_ENUM_VALUE_USAGE),
+ errmsg("unsafe use of new value \"%s\" of enum type %s",
+ NameStr(en->enumlabel),
+ format_type_be(en->enumtypid)),
+ errhint("New enum values must be committed before they can be used.")));
+}
+
+
/* Basic I/O support */
Datum
@@ -59,6 +147,9 @@ enum_in(PG_FUNCTION_ARGS)
format_type_be(enumtypoid),
name)));
+ /* check it's safe to use in SQL */
+ check_safe_enum_use(tup);
+
/*
* This comes from pg_enum.oid and stores system oids in user tables. This
* oid must be preserved by binary upgrades.
@@ -124,6 +215,9 @@ enum_recv(PG_FUNCTION_ARGS)
format_type_be(enumtypoid),
name)));
+ /* check it's safe to use in SQL */
+ check_safe_enum_use(tup);
+
enumoid = HeapTupleGetOid(tup);
ReleaseSysCache(tup);
@@ -327,9 +421,16 @@ enum_endpoint(Oid enumtypoid, ScanDirection direction)
enum_tuple = systable_getnext_ordered(enum_scan, direction);
if (HeapTupleIsValid(enum_tuple))
+ {
+ /* check it's safe to use in SQL */
+ check_safe_enum_use(enum_tuple);
minmax = HeapTupleGetOid(enum_tuple);
+ }
else
+ {
+ /* should only happen with an empty enum */
minmax = InvalidOid;
+ }
systable_endscan_ordered(enum_scan);
index_close(enum_idx, AccessShareLock);
@@ -490,6 +591,9 @@ enum_range_internal(Oid enumtypoid, Oid lower, Oid upper)
if (left_found)
{
+ /* check it's safe to use in SQL */
+ check_safe_enum_use(enum_tuple);
+
if (cnt >= max)
{
max *= 2;
diff --git a/src/backend/utils/errcodes.txt b/src/backend/utils/errcodes.txt
index be924d58bd..e7bdb925ac 100644
--- a/src/backend/utils/errcodes.txt
+++ b/src/backend/utils/errcodes.txt
@@ -398,6 +398,7 @@ Section: Class 55 - Object Not In Prerequisite State
55006 E ERRCODE_OBJECT_IN_USE object_in_use
55P02 E ERRCODE_CANT_CHANGE_RUNTIME_PARAM cant_change_runtime_param
55P03 E ERRCODE_LOCK_NOT_AVAILABLE lock_not_available
+55P04 E ERRCODE_UNSAFE_NEW_ENUM_VALUE_USAGE unsafe_new_enum_value_usage
Section: Class 57 - Operator Intervention
diff --git a/src/include/commands/typecmds.h b/src/include/commands/typecmds.h
index e4c86f1b1d..847b770f00 100644
--- a/src/include/commands/typecmds.h
+++ b/src/include/commands/typecmds.h
@@ -26,7 +26,7 @@ extern void RemoveTypeById(Oid typeOid);
extern ObjectAddress DefineDomain(CreateDomainStmt *stmt);
extern ObjectAddress DefineEnum(CreateEnumStmt *stmt);
extern ObjectAddress DefineRange(CreateRangeStmt *stmt);
-extern ObjectAddress AlterEnum(AlterEnumStmt *stmt, bool isTopLevel);
+extern ObjectAddress AlterEnum(AlterEnumStmt *stmt);
extern ObjectAddress DefineCompositeType(RangeVar *typevar, List *coldeflist);
extern Oid AssignTypeArrayOid(void);
diff --git a/src/test/regress/expected/enum.out b/src/test/regress/expected/enum.out
index 1a61a5b0df..d4a45a306b 100644
--- a/src/test/regress/expected/enum.out
+++ b/src/test/regress/expected/enum.out
@@ -560,25 +560,72 @@ DROP TYPE bogus;
-- check transactional behaviour of ALTER TYPE ... ADD VALUE
--
CREATE TYPE bogus AS ENUM('good');
--- check that we can't add new values to existing enums in a transaction
+-- check that we can add new values to existing enums in a transaction
+-- but we can't use them
BEGIN;
-ALTER TYPE bogus ADD VALUE 'bad';
-ERROR: ALTER TYPE ... ADD cannot run inside a transaction block
+ALTER TYPE bogus ADD VALUE 'new';
+SAVEPOINT x;
+SELECT 'new'::bogus; -- unsafe
+ERROR: unsafe use of new value "new" of enum type bogus
+LINE 1: SELECT 'new'::bogus;
+ ^
+HINT: New enum values must be committed before they can be used.
+ROLLBACK TO x;
+SELECT enum_first(null::bogus); -- safe
+ enum_first
+------------
+ good
+(1 row)
+
+SELECT enum_last(null::bogus); -- unsafe
+ERROR: unsafe use of new value "new" of enum type bogus
+HINT: New enum values must be committed before they can be used.
+ROLLBACK TO x;
+SELECT enum_range(null::bogus); -- unsafe
+ERROR: unsafe use of new value "new" of enum type bogus
+HINT: New enum values must be committed before they can be used.
+ROLLBACK TO x;
COMMIT;
+SELECT 'new'::bogus; -- now safe
+ bogus
+-------
+ new
+(1 row)
+
+SELECT enumlabel, enumsortorder
+FROM pg_enum
+WHERE enumtypid = 'bogus'::regtype
+ORDER BY 2;
+ enumlabel | enumsortorder
+-----------+---------------
+ good | 1
+ new | 2
+(2 rows)
+
-- check that we recognize the case where the enum already existed but was
--- modified in the current txn
+-- modified in the current txn; this should not be considered safe
BEGIN;
ALTER TYPE bogus RENAME TO bogon;
ALTER TYPE bogon ADD VALUE 'bad';
-ERROR: ALTER TYPE ... ADD cannot run inside a transaction block
+SELECT 'bad'::bogon;
+ERROR: unsafe use of new value "bad" of enum type bogon
+LINE 1: SELECT 'bad'::bogon;
+ ^
+HINT: New enum values must be committed before they can be used.
ROLLBACK;
DROP TYPE bogus;
--- check that we *can* add new values to existing enums in a transaction,
--- if the type is new as well
+-- check that we can add new values to existing enums in a transaction
+-- and use them, if the type is new as well
BEGIN;
-CREATE TYPE bogus AS ENUM();
-ALTER TYPE bogus ADD VALUE 'good';
+CREATE TYPE bogus AS ENUM('good');
+ALTER TYPE bogus ADD VALUE 'bad';
ALTER TYPE bogus ADD VALUE 'ugly';
+SELECT enum_range(null::bogus);
+ enum_range
+-----------------
+ {good,bad,ugly}
+(1 row)
+
ROLLBACK;
--
-- Cleanup
diff --git a/src/test/regress/sql/enum.sql b/src/test/regress/sql/enum.sql
index 88a835e8aa..d25e8dedb6 100644
--- a/src/test/regress/sql/enum.sql
+++ b/src/test/regress/sql/enum.sql
@@ -262,26 +262,42 @@ DROP TYPE bogus;
--
CREATE TYPE bogus AS ENUM('good');
--- check that we can't add new values to existing enums in a transaction
+-- check that we can add new values to existing enums in a transaction
+-- but we can't use them
BEGIN;
-ALTER TYPE bogus ADD VALUE 'bad';
+ALTER TYPE bogus ADD VALUE 'new';
+SAVEPOINT x;
+SELECT 'new'::bogus; -- unsafe
+ROLLBACK TO x;
+SELECT enum_first(null::bogus); -- safe
+SELECT enum_last(null::bogus); -- unsafe
+ROLLBACK TO x;
+SELECT enum_range(null::bogus); -- unsafe
+ROLLBACK TO x;
COMMIT;
+SELECT 'new'::bogus; -- now safe
+SELECT enumlabel, enumsortorder
+FROM pg_enum
+WHERE enumtypid = 'bogus'::regtype
+ORDER BY 2;
-- check that we recognize the case where the enum already existed but was
--- modified in the current txn
+-- modified in the current txn; this should not be considered safe
BEGIN;
ALTER TYPE bogus RENAME TO bogon;
ALTER TYPE bogon ADD VALUE 'bad';
+SELECT 'bad'::bogon;
ROLLBACK;
DROP TYPE bogus;
--- check that we *can* add new values to existing enums in a transaction,
--- if the type is new as well
+-- check that we can add new values to existing enums in a transaction
+-- and use them, if the type is new as well
BEGIN;
-CREATE TYPE bogus AS ENUM();
-ALTER TYPE bogus ADD VALUE 'good';
+CREATE TYPE bogus AS ENUM('good');
+ALTER TYPE bogus ADD VALUE 'bad';
ALTER TYPE bogus ADD VALUE 'ugly';
+SELECT enum_range(null::bogus);
ROLLBACK;
--
--
cgit v1.2.3
From 975768f8eae2581b89ceafe8b16a77ff375207fe Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Tue, 6 Sep 2016 17:50:53 -0400
Subject: Doc: small improvements for documentation about VACUUM freezing.
Mostly, explain how row xmin's used to be replaced by FrozenTransactionId
and no longer are. Do a little copy-editing on the side.
Per discussion with Egor Rogov. Back-patch to 9.4 where the behavioral
change occurred.
Discussion: <575D7955.6060209@postgrespro.ru>
---
doc/src/sgml/maintenance.sgml | 48 +++++++++++++++++++++++++++++++------------
1 file changed, 35 insertions(+), 13 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/maintenance.sgml b/doc/src/sgml/maintenance.sgml
index 2713883019..f87f3e00de 100644
--- a/doc/src/sgml/maintenance.sgml
+++ b/doc/src/sgml/maintenance.sgml
@@ -389,7 +389,8 @@
- PostgreSQL's MVCC transaction semantics
+ PostgreSQL's
+ MVCC transaction semantics
depend on being able to compare transaction ID (XID>)
numbers: a row version with an insertion XID greater than the current
transaction's XID is in the future> and should not be visible
@@ -407,13 +408,10 @@
The reason that periodic vacuuming solves the problem is that
VACUUM> will mark rows as frozen>, indicating that
- they were inserted by a transaction which committed sufficiently far in
- the past that the effects of the inserting transaction is certain to be
- visible, from an MVCC perspective, to all current and future transactions.
- PostgreSQL> reserves a special XID,
- FrozenTransactionId>, which does not follow the normal XID
- comparison rules and is always considered older
- than every normal XID. Normal XIDs are
+ they were inserted by a transaction that committed sufficiently far in
+ the past that the effects of the inserting transaction are certain to be
+ visible to all current and future transactions.
+ Normal XIDs are
compared using modulo-232> arithmetic. This means
that for every normal XID, there are two billion XIDs that are
older> and two billion that are newer>; another
@@ -423,16 +421,40 @@
the next two billion transactions, no matter which normal XID we are
talking about. If the row version still exists after more than two billion
transactions, it will suddenly appear to be in the future. To
- prevent this, frozen row versions are treated as if the inserting XID were
+ prevent this, PostgreSQL> reserves a special XID,
+ FrozenTransactionId>, which does not follow the normal XID
+ comparison rules and is always considered older
+ than every normal XID.
+ Frozen row versions are treated as if the inserting XID were
FrozenTransactionId>, so that they will appear to be
in the past> to all normal transactions regardless of wraparound
issues, and so such row versions will be valid until deleted, no matter
how long that is.
+
+
+ In PostgreSQL> versions before 9.4, freezing was
+ implemented by actually replacing a row's insertion XID
+ with FrozenTransactionId>, which was visible in the
+ row's xmin> system column. Newer versions just set a flag
+ bit, preserving the row's original xmin> for possible
+ forensic use. However, rows with xmin> equal
+ to FrozenTransactionId> (2) may still be found
+ in databases pg_upgrade>'d from pre-9.4 versions.
+
+
+ Also, system catalogs may contain rows with xmin> equal
+ to BootstrapTransactionId> (1), indicating that they were
+ inserted during the first phase of initdb>.
+ Like FrozenTransactionId>, this special XID is treated as
+ older than every normal XID.
+
+
+
- controls how old an XID value has to be before its row version will be
+ controls how old an XID value has to be before rows bearing that XID will be
frozen. Increasing this setting may avoid unnecessary work if the
rows that would otherwise be frozen will soon be modified again,
but decreasing this setting increases
@@ -442,10 +464,10 @@
VACUUM> uses the visibility map>
- to determine which pages of a relation must be scanned. Normally, it
- will skips pages that don't have any dead row versions even if those pages
+ to determine which pages of a table must be scanned. Normally, it
+ will skip pages that don't have any dead row versions even if those pages
might still have row versions with old XID values. Therefore, normal
- scans won't succeed in freezing every row version in the table.
+ VACUUM>s won't always freeze every old row version in the table.
Periodically, VACUUM> will perform an aggressive
vacuum>, skipping only those pages which contain neither dead rows nor
any unfrozen XID or MXID values.
--
cgit v1.2.3
From bd180b607927c7757af17cd6fce0e545e5c48584 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Wed, 7 Sep 2016 13:36:08 -0400
Subject: Doc: minor documentation improvements about extensions.
Document the formerly-undocumented behavior that schema and comment
control-file entries for an extension are honored only during initial
installation, whereas other properties are also honored during updates.
While at it, do some copy-editing on the recently-added docs for CREATE
EXTENSION ... CASCADE, use links for some formerly vague cross references,
and make a couple other minor improvements.
Back-patch to 9.6 where CASCADE was added. The other parts of this
could go further back, but they're probably not important enough to
bother.
---
doc/src/sgml/extend.sgml | 32 ++++++++++++--------
doc/src/sgml/ref/create_extension.sgml | 54 ++++++++++++++--------------------
2 files changed, 42 insertions(+), 44 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/extend.sgml b/doc/src/sgml/extend.sgml
index f050ff1f66..df88380a23 100644
--- a/doc/src/sgml/extend.sgml
+++ b/doc/src/sgml/extend.sgml
@@ -335,11 +335,13 @@
by pg_dump>. Such a change is usually only sensible if
you concurrently make the same change in the extension's script file.
(But there are special provisions for tables containing configuration
- data; see below.)
+ data; see .)
+ In production situations, it's generally better to create an extension
+ update script to perform changes to extension member objects.
- The extension script may set privileges on objects which are part of the
+ The extension script may set privileges on objects that are part of the
extension via GRANT and REVOKE
statements. The final set of privileges for each object (if any are set)
will be stored in the
@@ -453,9 +455,11 @@
comment (string)
- A comment (any string) about the extension. Alternatively,
- the comment can be set by means of the
- command in the script file.
+ A comment (any string) about the extension. The comment is applied
+ when initially creating an extension, but not during extension updates
+ (since that might override user-added comments). Alternatively,
+ the extension's comment can be set by writing
+ a command in the script file.
@@ -518,7 +522,7 @@
its contained objects into a different schema after initial creation
of the extension. The default is false>, i.e. the
extension is not relocatable.
- See below for more information.
+ See for more information.
@@ -529,7 +533,10 @@
This parameter can only be set for non-relocatable extensions.
It forces the extension to be loaded into exactly the named schema
- and not any other. See below for more information.
+ and not any other.
+ The schema parameter is consulted only when
+ initially creating an extension, not during extension updates.
+ See for more information.
@@ -562,7 +569,8 @@
comments) by the extension mechanism. This provision is commonly used
to throw an error if the script file is fed to psql>
rather than being loaded via CREATE EXTENSION> (see example
- script below). Without that, users might accidentally load the
+ script in ).
+ Without that, users might accidentally load the
extension's contents as loose> objects rather than as an
extension, a state of affairs that's a bit tedious to recover from.
@@ -580,7 +588,7 @@
-
+ Extension Relocatability
@@ -678,7 +686,7 @@ SET LOCAL search_path TO @extschema@;
-
+ Extension Configuration Tables
@@ -762,7 +770,7 @@ SELECT pg_catalog.pg_extension_config_dump('my_config', 'WHERE NOT standard_entr
out but the dump will not be able to be restored directly and user
intervention will be required.
-
+
Sequences associated with serial> or bigserial> columns
need to be directly marked to dump their state. Marking their parent
@@ -877,7 +885,7 @@ SELECT * FROM pg_extension_update_paths('extension_name>');
-
+ Extension Example
diff --git a/doc/src/sgml/ref/create_extension.sgml b/doc/src/sgml/ref/create_extension.sgml
index 007d8c9330..14e910115a 100644
--- a/doc/src/sgml/ref/create_extension.sgml
+++ b/doc/src/sgml/ref/create_extension.sgml
@@ -95,35 +95,21 @@ CREATE EXTENSION [ IF NOT EXISTS ] extension_name
If not specified, and the extension's control file does not specify a
schema either, the current default object creation schema is used.
+
- If the extension specifies schema> in its control file,
- the schema cannot be overridden with SCHEMA> clause.
- The SCHEMA> clause in this case works as follows:
-
-
-
- If schema_name matches
- the schema in control file, it will be used normally as there is no
- conflict.
-
-
-
-
- If the CASCADE> clause is given, the
- schema_name will only
- be used for the missing required extensions which do not specify
- schema> in their control files.
-
-
-
-
- If schema_name is not
- the same as the one in extension's control file and the
- CASCADE> clause is not given, error will be thrown.
-
-
-
+ If the extension specifies a schema> parameter in its
+ control file, then that schema cannot be overridden with
+ a SCHEMA> clause. Normally, an error will be raised if
+ a SCHEMA> clause is given and it conflicts with the
+ extension's schema> parameter. However, if
+ the CASCADE> clause is also given,
+ then schema_name is
+ ignored when it conflicts. The
+ given schema_name will be
+ used for installation of any needed extensions that do not
+ specify schema> in their control files.
+
Remember that the extension itself is not considered to be within any
schema: extensions have unqualified names that must be unique
@@ -147,7 +133,8 @@ CREATE EXTENSION [ IF NOT EXISTS ] extension_name
old_version
- FROM> old_version>
+
+ FROM> old_version>
must be specified when, and only when, you are attempting to install
an extension that replaces an old style> module that is just
a collection of objects not packaged into an extension. This option
@@ -174,10 +161,13 @@ CREATE EXTENSION [ IF NOT EXISTS ] extension_name
CASCADE>
- Try to install extension including the required dependencies
- recursively. The SCHEMA> option will be propagated
- to the required extensions. Other options are not recursively
- applied when using this clause.
+ Automatically install any extensions that this extension depends on
+ that are not already installed. Their dependencies are likewise
+ automatically installed, recursively. The SCHEMA> clause,
+ if given, applies to all extensions that get installed this way.
+ Other options of the statement are not applied to
+ automatically-installed extensions; in particular, their default
+ versions are always selected.
--
cgit v1.2.3
From 0ab9c56d0fe3acc9d4717a9cbac6ef3369275b90 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Wed, 7 Sep 2016 16:11:56 -0400
Subject: Support renaming an existing value of an enum type.
Not much to be said about this patch: it does what it says on the tin.
In passing, rename AlterEnumStmt.skipIfExists to skipIfNewValExists
to clarify what it actually does. In the discussion of this patch
we considered supporting other similar options, such as IF EXISTS
on the type as a whole or IF NOT EXISTS on the target name. This
patch doesn't actually add any such feature, but it might happen later.
Dagfinn Ilmari Mannsåker, reviewed by Emre Hasegeli
Discussion:
---
doc/src/sgml/ref/alter_type.sgml | 68 ++++++++++++++++++++++--------
src/backend/catalog/pg_enum.c | 85 ++++++++++++++++++++++++++++++++++++++
src/backend/commands/typecmds.c | 20 ++++++---
src/backend/nodes/copyfuncs.c | 3 +-
src/backend/nodes/equalfuncs.c | 3 +-
src/backend/parser/gram.y | 20 +++++++--
src/include/catalog/pg_enum.h | 2 +
src/include/nodes/parsenodes.h | 3 +-
src/test/regress/expected/enum.out | 22 ++++++++++
src/test/regress/sql/enum.sql | 11 +++++
10 files changed, 208 insertions(+), 29 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/alter_type.sgml b/doc/src/sgml/ref/alter_type.sgml
index aec73f6285..fdb4f3367d 100644
--- a/doc/src/sgml/ref/alter_type.sgml
+++ b/doc/src/sgml/ref/alter_type.sgml
@@ -28,7 +28,8 @@ ALTER TYPE name OWNER TO { name RENAME ATTRIBUTE attribute_name TO new_attribute_name [ CASCADE | RESTRICT ]
ALTER TYPE name RENAME TO new_name
ALTER TYPE name SET SCHEMA new_schema
-ALTER TYPE name ADD VALUE [ IF NOT EXISTS ] new_enum_value [ { BEFORE | AFTER } existing_enum_value ]
+ALTER TYPE name ADD VALUE [ IF NOT EXISTS ] new_enum_value [ { BEFORE | AFTER } neighbor_enum_value ]
+ALTER TYPE name RENAME VALUE existing_enum_value TO new_enum_valuewhere action is one of:
@@ -124,21 +125,13 @@ ALTER TYPE name ADD VALUE [ IF NOT
- CASCADE
+ RENAME VALUE
- Automatically propagate the operation to typed tables of the
- type being altered, and their descendants.
-
-
-
-
-
- RESTRICT
-
-
- Refuse the operation if the type being altered is the type of a
- typed table. This is the default.
+ This form renames a value of an enum type.
+ The value's place in the enum's ordering is not affected.
+ An error will occur if the specified value is not present or the new
+ name is already present.
@@ -241,14 +234,15 @@ ALTER TYPE name ADD VALUE [ IF NOT
new_enum_value
- The new value to be added to an enum type's list of values.
+ The new value to be added to an enum type's list of values,
+ or the new name to be given to an existing value.
Like all enum literals, it needs to be quoted.
- existing_enum_value
+ neighbor_enum_value
The existing enum value that the new value should be added immediately
@@ -258,6 +252,36 @@ ALTER TYPE name ADD VALUE [ IF NOT
+
+ existing_enum_value
+
+
+ The existing enum value that should be renamed.
+ Like all enum literals, it needs to be quoted.
+
+
+
+
+
+ CASCADE
+
+
+ Automatically propagate the operation to typed tables of the
+ type being altered, and their descendants.
+
+
+
+
+
+ RESTRICT
+
+
+ Refuse the operation if the type being altered is the type of a
+ typed table. This is the default.
+
+
+
+
@@ -270,6 +294,8 @@ ALTER TYPE name ADD VALUE [ IF NOT
an enum type) is executed inside a transaction block, the new value cannot
be used until after the transaction has been committed, except in the case
that the enum type itself was created earlier in the same transaction.
+ Likewise, when a pre-existing enum value is renamed, the transaction must
+ be committed before the renamed value can be used.
@@ -323,7 +349,15 @@ ALTER TYPE compfoo ADD ATTRIBUTE f3 int;
To add a new value to an enum type in a particular sort position:
ALTER TYPE colors ADD VALUE 'orange' AFTER 'red';
-
+
+
+
+
+ To rename an enum value:
+
+ALTER TYPE colors RENAME VALUE 'purple' TO 'mauve';
+
+
diff --git a/src/backend/catalog/pg_enum.c b/src/backend/catalog/pg_enum.c
index c66f9632c2..1f0ffcfa15 100644
--- a/src/backend/catalog/pg_enum.c
+++ b/src/backend/catalog/pg_enum.c
@@ -465,6 +465,91 @@ restart:
}
+/*
+ * RenameEnumLabel
+ * Rename a label in an enum set.
+ */
+void
+RenameEnumLabel(Oid enumTypeOid,
+ const char *oldVal,
+ const char *newVal)
+{
+ Relation pg_enum;
+ HeapTuple enum_tup;
+ Form_pg_enum en;
+ CatCList *list;
+ int nelems;
+ HeapTuple old_tup;
+ bool found_new;
+ int i;
+
+ /* check length of new label is ok */
+ if (strlen(newVal) > (NAMEDATALEN - 1))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_NAME),
+ errmsg("invalid enum label \"%s\"", newVal),
+ errdetail("Labels must be %d characters or less.",
+ NAMEDATALEN - 1)));
+
+ /*
+ * Acquire a lock on the enum type, which we won't release until commit.
+ * This ensures that two backends aren't concurrently modifying the same
+ * enum type. Since we are not changing the type's sort order, this is
+ * probably not really necessary, but there seems no reason not to take
+ * the lock to be sure.
+ */
+ LockDatabaseObject(TypeRelationId, enumTypeOid, 0, ExclusiveLock);
+
+ pg_enum = heap_open(EnumRelationId, RowExclusiveLock);
+
+ /* Get the list of existing members of the enum */
+ list = SearchSysCacheList1(ENUMTYPOIDNAME,
+ ObjectIdGetDatum(enumTypeOid));
+ nelems = list->n_members;
+
+ /*
+ * Locate the element to rename and check if the new label is already in
+ * use. (The unique index on pg_enum would catch that anyway, but we
+ * prefer a friendlier error message.)
+ */
+ old_tup = NULL;
+ found_new = false;
+ for (i = 0; i < nelems; i++)
+ {
+ enum_tup = &(list->members[i]->tuple);
+ en = (Form_pg_enum) GETSTRUCT(enum_tup);
+ if (strcmp(NameStr(en->enumlabel), oldVal) == 0)
+ old_tup = enum_tup;
+ if (strcmp(NameStr(en->enumlabel), newVal) == 0)
+ found_new = true;
+ }
+ if (!old_tup)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("\"%s\" is not an existing enum label",
+ oldVal)));
+ if (found_new)
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("enum label \"%s\" already exists",
+ newVal)));
+
+ /* OK, make a writable copy of old tuple */
+ enum_tup = heap_copytuple(old_tup);
+ en = (Form_pg_enum) GETSTRUCT(enum_tup);
+
+ ReleaseCatCacheList(list);
+
+ /* Update the pg_enum entry */
+ namestrcpy(&en->enumlabel, newVal);
+ simple_heap_update(pg_enum, &enum_tup->t_self, enum_tup);
+ CatalogUpdateIndexes(pg_enum, enum_tup);
+ heap_freetuple(enum_tup);
+
+ heap_close(pg_enum, RowExclusiveLock);
+}
+
+
/*
* RenumberEnumType
* Renumber existing enum elements to have sort positions 1..n.
diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c
index 6cc7106467..41fd2dae7f 100644
--- a/src/backend/commands/typecmds.c
+++ b/src/backend/commands/typecmds.c
@@ -1241,17 +1241,25 @@ AlterEnum(AlterEnumStmt *stmt)
/* Check it's an enum and check user has permission to ALTER the enum */
checkEnumOwner(tup);
- /* Add the new label */
- AddEnumLabel(enum_type_oid, stmt->newVal,
- stmt->newValNeighbor, stmt->newValIsAfter,
- stmt->skipIfExists);
+ ReleaseSysCache(tup);
+
+ if (stmt->oldVal)
+ {
+ /* Rename an existing label */
+ RenameEnumLabel(enum_type_oid, stmt->oldVal, stmt->newVal);
+ }
+ else
+ {
+ /* Add a new label */
+ AddEnumLabel(enum_type_oid, stmt->newVal,
+ stmt->newValNeighbor, stmt->newValIsAfter,
+ stmt->skipIfNewValExists);
+ }
InvokeObjectPostAlterHook(TypeRelationId, enum_type_oid, 0);
ObjectAddressSet(address, TypeRelationId, enum_type_oid);
- ReleaseSysCache(tup);
-
return address;
}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index be2207e318..4f39dad66b 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -3375,10 +3375,11 @@ _copyAlterEnumStmt(const AlterEnumStmt *from)
AlterEnumStmt *newnode = makeNode(AlterEnumStmt);
COPY_NODE_FIELD(typeName);
+ COPY_STRING_FIELD(oldVal);
COPY_STRING_FIELD(newVal);
COPY_STRING_FIELD(newValNeighbor);
COPY_SCALAR_FIELD(newValIsAfter);
- COPY_SCALAR_FIELD(skipIfExists);
+ COPY_SCALAR_FIELD(skipIfNewValExists);
return newnode;
}
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index c4ec4077a6..4800165a91 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -1465,10 +1465,11 @@ static bool
_equalAlterEnumStmt(const AlterEnumStmt *a, const AlterEnumStmt *b)
{
COMPARE_NODE_FIELD(typeName);
+ COMPARE_STRING_FIELD(oldVal);
COMPARE_STRING_FIELD(newVal);
COMPARE_STRING_FIELD(newValNeighbor);
COMPARE_SCALAR_FIELD(newValIsAfter);
- COMPARE_SCALAR_FIELD(skipIfExists);
+ COMPARE_SCALAR_FIELD(skipIfNewValExists);
return true;
}
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index b69a77a588..1526c73a1c 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -5257,30 +5257,44 @@ AlterEnumStmt:
{
AlterEnumStmt *n = makeNode(AlterEnumStmt);
n->typeName = $3;
+ n->oldVal = NULL;
n->newVal = $7;
n->newValNeighbor = NULL;
n->newValIsAfter = true;
- n->skipIfExists = $6;
+ n->skipIfNewValExists = $6;
$$ = (Node *) n;
}
| ALTER TYPE_P any_name ADD_P VALUE_P opt_if_not_exists Sconst BEFORE Sconst
{
AlterEnumStmt *n = makeNode(AlterEnumStmt);
n->typeName = $3;
+ n->oldVal = NULL;
n->newVal = $7;
n->newValNeighbor = $9;
n->newValIsAfter = false;
- n->skipIfExists = $6;
+ n->skipIfNewValExists = $6;
$$ = (Node *) n;
}
| ALTER TYPE_P any_name ADD_P VALUE_P opt_if_not_exists Sconst AFTER Sconst
{
AlterEnumStmt *n = makeNode(AlterEnumStmt);
n->typeName = $3;
+ n->oldVal = NULL;
n->newVal = $7;
n->newValNeighbor = $9;
n->newValIsAfter = true;
- n->skipIfExists = $6;
+ n->skipIfNewValExists = $6;
+ $$ = (Node *) n;
+ }
+ | ALTER TYPE_P any_name RENAME VALUE_P Sconst TO Sconst
+ {
+ AlterEnumStmt *n = makeNode(AlterEnumStmt);
+ n->typeName = $3;
+ n->oldVal = $6;
+ n->newVal = $8;
+ n->newValNeighbor = NULL;
+ n->newValIsAfter = false;
+ n->skipIfNewValExists = false;
$$ = (Node *) n;
}
;
diff --git a/src/include/catalog/pg_enum.h b/src/include/catalog/pg_enum.h
index dd32443b91..901d3adbb9 100644
--- a/src/include/catalog/pg_enum.h
+++ b/src/include/catalog/pg_enum.h
@@ -67,5 +67,7 @@ extern void EnumValuesDelete(Oid enumTypeOid);
extern void AddEnumLabel(Oid enumTypeOid, const char *newVal,
const char *neighbor, bool newValIsAfter,
bool skipIfExists);
+extern void RenameEnumLabel(Oid enumTypeOid,
+ const char *oldVal, const char *newVal);
#endif /* PG_ENUM_H */
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 3716c2eef9..8d3dcf4d4c 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -2708,10 +2708,11 @@ typedef struct AlterEnumStmt
{
NodeTag type;
List *typeName; /* qualified name (list of Value strings) */
+ char *oldVal; /* old enum value's name, if renaming */
char *newVal; /* new enum value's name */
char *newValNeighbor; /* neighboring enum value, if specified */
bool newValIsAfter; /* place new enum value after neighbor? */
- bool skipIfExists; /* no error if label already exists */
+ bool skipIfNewValExists; /* no error if new already exists? */
} AlterEnumStmt;
/* ----------------------
diff --git a/src/test/regress/expected/enum.out b/src/test/regress/expected/enum.out
index d4a45a306b..514d1d01a1 100644
--- a/src/test/regress/expected/enum.out
+++ b/src/test/regress/expected/enum.out
@@ -556,6 +556,28 @@ CREATE TABLE enumtest_bogus_child(parent bogus REFERENCES enumtest_parent);
ERROR: foreign key constraint "enumtest_bogus_child_parent_fkey" cannot be implemented
DETAIL: Key columns "parent" and "id" are of incompatible types: bogus and rainbow.
DROP TYPE bogus;
+-- check renaming a value
+ALTER TYPE rainbow RENAME VALUE 'red' TO 'crimson';
+SELECT enumlabel, enumsortorder
+FROM pg_enum
+WHERE enumtypid = 'rainbow'::regtype
+ORDER BY 2;
+ enumlabel | enumsortorder
+-----------+---------------
+ crimson | 1
+ orange | 2
+ yellow | 3
+ green | 4
+ blue | 5
+ purple | 6
+(6 rows)
+
+-- check that renaming a non-existent value fails
+ALTER TYPE rainbow RENAME VALUE 'red' TO 'crimson';
+ERROR: "red" is not an existing enum label
+-- check that renaming to an existent value fails
+ALTER TYPE rainbow RENAME VALUE 'blue' TO 'green';
+ERROR: enum label "green" already exists
--
-- check transactional behaviour of ALTER TYPE ... ADD VALUE
--
diff --git a/src/test/regress/sql/enum.sql b/src/test/regress/sql/enum.sql
index d25e8dedb6..d7e87143a0 100644
--- a/src/test/regress/sql/enum.sql
+++ b/src/test/regress/sql/enum.sql
@@ -257,6 +257,17 @@ CREATE TYPE bogus AS ENUM('good', 'bad', 'ugly');
CREATE TABLE enumtest_bogus_child(parent bogus REFERENCES enumtest_parent);
DROP TYPE bogus;
+-- check renaming a value
+ALTER TYPE rainbow RENAME VALUE 'red' TO 'crimson';
+SELECT enumlabel, enumsortorder
+FROM pg_enum
+WHERE enumtypid = 'rainbow'::regtype
+ORDER BY 2;
+-- check that renaming a non-existent value fails
+ALTER TYPE rainbow RENAME VALUE 'red' TO 'crimson';
+-- check that renaming to an existent value fails
+ALTER TYPE rainbow RENAME VALUE 'blue' TO 'green';
+
--
-- check transactional behaviour of ALTER TYPE ... ADD VALUE
--
--
cgit v1.2.3
From c9cf432ef32a9d29323b9b079178c1a6be126ff8 Mon Sep 17 00:00:00 2001
From: Bruce Momjian
Date: Wed, 7 Sep 2016 20:51:28 -0400
Subject: 9.6 release notes: correct summary item about freeze
Previously it less precisely talked about autovacuum.
Backpatch-through: 9.6
---
doc/src/sgml/release-9.6.sgml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/release-9.6.sgml b/doc/src/sgml/release-9.6.sgml
index 578c3d1fdb..ddd280c85a 100644
--- a/doc/src/sgml/release-9.6.sgml
+++ b/doc/src/sgml/release-9.6.sgml
@@ -29,7 +29,7 @@
- Autovacuum no longer performs repetitive scanning of old data
+ Avoid scanning pages unnecessarily during vacuum freeze operations
--
cgit v1.2.3
From d299eb41dfc7b73dec80f22554b952f01c9d54a4 Mon Sep 17 00:00:00 2001
From: Noah Misch
Date: Thu, 8 Sep 2016 01:42:09 -0400
Subject: MSVC: Pass any user-set MSBFLAGS to MSBuild and VCBUILD.
This is particularly useful to pass /m, to perform a parallel build.
Christian Ullrich, reviewed by Michael Paquier.
---
doc/src/sgml/install-windows.sgml | 8 ++++++++
src/tools/msvc/build.pl | 7 ++++---
src/tools/msvc/clean.bat | 2 +-
src/tools/msvc/vcregress.pl | 3 ++-
4 files changed, 15 insertions(+), 5 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/install-windows.sgml b/doc/src/sgml/install-windows.sgml
index 8cd189c8e1..50116f315d 100644
--- a/doc/src/sgml/install-windows.sgml
+++ b/doc/src/sgml/install-windows.sgml
@@ -145,6 +145,14 @@ $ENV{PATH}=$ENV{PATH} . ';c:\some\where\bison\bin';
+
+ To pass additional command line arguments to the Visual Studio build
+ command (msbuild or vcbuild):
+
+$ENV{MSBFLAGS}="/m";
+
+
+
Requirements
diff --git a/src/tools/msvc/build.pl b/src/tools/msvc/build.pl
index 007e3c73b2..52739774c7 100644
--- a/src/tools/msvc/build.pl
+++ b/src/tools/msvc/build.pl
@@ -38,6 +38,7 @@ my $vcver = Mkvcbuild::mkvcbuild($config);
# check what sort of build we are doing
my $bconf = $ENV{CONFIG} || "Release";
+my $msbflags = $ENV{MSBFLAGS} || "";
my $buildwhat = $ARGV[1] || "";
if (uc($ARGV[0]) eq 'DEBUG')
{
@@ -53,16 +54,16 @@ elsif (uc($ARGV[0]) ne "RELEASE")
if ($buildwhat and $vcver >= 10.00)
{
system(
- "msbuild $buildwhat.vcxproj /verbosity:normal /p:Configuration=$bconf"
+ "msbuild $buildwhat.vcxproj $msbflags /verbosity:normal /p:Configuration=$bconf"
);
}
elsif ($buildwhat)
{
- system("vcbuild $buildwhat.vcproj $bconf");
+ system("vcbuild $msbflags $buildwhat.vcproj $bconf");
}
else
{
- system("msbuild pgsql.sln /verbosity:normal /p:Configuration=$bconf");
+ system("msbuild pgsql.sln $msbflags /verbosity:normal /p:Configuration=$bconf");
}
# report status
diff --git a/src/tools/msvc/clean.bat b/src/tools/msvc/clean.bat
index 469b8a24b2..e21e37f6f7 100755
--- a/src/tools/msvc/clean.bat
+++ b/src/tools/msvc/clean.bat
@@ -107,6 +107,6 @@ REM for /r %%f in (*.sql) do if exist %%f.in del %%f
cd %D%
REM Clean up ecpg regression test files
-msbuild /NoLogo ecpg_regression.proj /t:clean /v:q
+msbuild %MSBFLAGS% /NoLogo ecpg_regression.proj /t:clean /v:q
goto :eof
diff --git a/src/tools/msvc/vcregress.pl b/src/tools/msvc/vcregress.pl
index b4f946474f..bcf22677ac 100644
--- a/src/tools/msvc/vcregress.pl
+++ b/src/tools/msvc/vcregress.pl
@@ -138,8 +138,9 @@ sub check
sub ecpgcheck
{
+ my $msbflags = $ENV{MSBFLAGS} || "";
chdir $startdir;
- system("msbuild ecpg_regression.proj /p:config=$Config");
+ system("msbuild ecpg_regression.proj $msbflags /p:config=$Config");
my $status = $? >> 8;
exit $status if $status;
InstallTemp();
--
cgit v1.2.3
From f66472428a51fc484bc5ca81791924d06a6f096d Mon Sep 17 00:00:00 2001
From: Simon Riggs
Date: Fri, 9 Sep 2016 11:19:21 +0100
Subject: Correct TABLESAMPLE docs
Revert to original use of word “sample”, though with clarification,
per Tom Lane.
Discussion: 29052.1471015383@sss.pgh.pa.us
---
doc/src/sgml/ref/select.sgml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml
index e0098eb8d3..211e4c320c 100644
--- a/doc/src/sgml/ref/select.sgml
+++ b/doc/src/sgml/ref/select.sgml
@@ -391,7 +391,7 @@ TABLE [ ONLY ] table_name [ * ]
not been changed meanwhile. But different seed values will usually
produce different samples.
If REPEATABLE is not given then a new random
- seed is selected for each query.
+ sample is selected for each query, based upon a system-generated seed.
Note that some add-on sampling methods do not
accept REPEATABLE, and will always produce new
samples on each use.
--
cgit v1.2.3
From 40b449ae84dcf71177d7749a7b0c582b64dc15f0 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Sun, 11 Sep 2016 14:15:07 -0400
Subject: Allow CREATE EXTENSION to follow extension update paths.
Previously, to update an extension you had to produce both a version-update
script and a new base installation script. It's become more and more
obvious that that's tedious, duplicative, and error-prone. This patch
attempts to improve matters by allowing the new base installation script
to be omitted. CREATE EXTENSION will install a requested version if it
can find a base script and a chain of update scripts that will get there.
As in the existing update logic, shorter chains are preferred if there's
more than one possibility, with an arbitrary tie-break rule for chains
of equal length.
Also adjust the pg_available_extension_versions view to show such versions
as installable.
While at it, refactor the code so that CASCADE processing works for
extensions requested during ApplyExtensionUpdates(). Without this,
addition of a new requirement in an updated extension would require
creating a new base script, even if there was no other reason to do that.
(It would be easy at this point to add a CASCADE option to ALTER EXTENSION
UPDATE, to allow the same thing to happen during a manually-commanded
version update, but I have not done that here.)
Tom Lane, reviewed by Andres Freund
Discussion: <20160905005919.jz2m2yh3und2dsuy@alap3.anarazel.de>
---
doc/src/sgml/extend.sgml | 41 +++
src/backend/commands/extension.c | 608 +++++++++++++++++++++++++--------------
2 files changed, 439 insertions(+), 210 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/extend.sgml b/doc/src/sgml/extend.sgml
index df88380a23..e19c657d8f 100644
--- a/doc/src/sgml/extend.sgml
+++ b/doc/src/sgml/extend.sgml
@@ -885,6 +885,47 @@ SELECT * FROM pg_extension_update_paths('extension_name>');
+
+ Installing Extensions using Update Scripts
+
+
+ An extension that has been around for awhile will probably exist in
+ several versions, for which the author will need to write update scripts.
+ For example, if you have released a foo> extension in
+ versions 1.0>, 1.1>, and 1.2>, there
+ should be update scripts foo--1.0--1.1.sql>
+ and foo--1.1--1.2.sql>.
+ Before PostgreSQL> 10, it was necessary to also create
+ new script files foo--1.1.sql> and foo--1.2.sql>
+ that directly build the newer extension versions, or else the newer
+ versions could not be installed directly, only by
+ installing 1.0> and then updating. That was tedious and
+ duplicative, but now it's unnecessary, because CREATE
+ EXTENSION> can follow update chains automatically.
+ For example, if only the script
+ files foo--1.0.sql>, foo--1.0--1.1.sql>,
+ and foo--1.1--1.2.sql> are available then a request to
+ install version 1.2> is honored by running those three
+ scripts in sequence. The processing is the same as if you'd first
+ installed 1.0> and then updated to 1.2>.
+ (As with ALTER EXTENSION UPDATE>, if multiple pathways are
+ available then the shortest is preferred.) Arranging an extension's
+ script files in this style can reduce the amount of maintenance effort
+ needed to produce small updates.
+
+
+
+ If you use secondary (version-specific) control files with an extension
+ maintained in this style, keep in mind that each version needs a control
+ file even if it has no stand-alone installation script, as that control
+ file will determine how the implicit update to that version is performed.
+ For example, if foo--1.0.control> specifies requires
+ = 'bar'> but foo>'s other control files do not, the
+ extension's dependency on bar> will be dropped when updating
+ from 1.0> to another version.
+
+
+
Extension Example
diff --git a/src/backend/commands/extension.c b/src/backend/commands/extension.c
index df49a78e2f..f6c2c8af91 100644
--- a/src/backend/commands/extension.c
+++ b/src/backend/commands/extension.c
@@ -100,14 +100,25 @@ typedef struct ExtensionVersionInfo
static List *find_update_path(List *evi_list,
ExtensionVersionInfo *evi_start,
ExtensionVersionInfo *evi_target,
+ bool reject_indirect,
bool reinitialize);
+static Oid get_required_extension(char *reqExtensionName,
+ char *extensionName,
+ char *origSchemaName,
+ bool cascade,
+ List *parents,
+ bool is_create);
static void get_available_versions_for_extension(ExtensionControlFile *pcontrol,
Tuplestorestate *tupstore,
TupleDesc tupdesc);
+static Datum convert_requires_to_datum(List *requires);
static void ApplyExtensionUpdates(Oid extensionOid,
ExtensionControlFile *pcontrol,
const char *initialVersion,
- List *updateVersions);
+ List *updateVersions,
+ char *origSchemaName,
+ bool cascade,
+ bool is_create);
static char *read_whole_file(const char *filename, int *length);
@@ -1071,7 +1082,7 @@ identify_update_path(ExtensionControlFile *control,
evi_target = get_ext_ver_info(newVersion, &evi_list);
/* Find shortest path */
- result = find_update_path(evi_list, evi_start, evi_target, false);
+ result = find_update_path(evi_list, evi_start, evi_target, false, false);
if (result == NIL)
ereport(ERROR,
@@ -1086,9 +1097,13 @@ identify_update_path(ExtensionControlFile *control,
* Apply Dijkstra's algorithm to find the shortest path from evi_start to
* evi_target.
*
+ * If reject_indirect is true, ignore paths that go through installable
+ * versions. This saves work when the caller will consider starting from
+ * all installable versions anyway.
+ *
* If reinitialize is false, assume the ExtensionVersionInfo list has not
* been used for this before, and the initialization done by get_ext_ver_info
- * is still good.
+ * is still good. Otherwise, reinitialize all transient fields used here.
*
* Result is a List of names of versions to transition through (the initial
* version is *not* included). Returns NIL if no such path.
@@ -1097,6 +1112,7 @@ static List *
find_update_path(List *evi_list,
ExtensionVersionInfo *evi_start,
ExtensionVersionInfo *evi_target,
+ bool reject_indirect,
bool reinitialize)
{
List *result;
@@ -1105,6 +1121,8 @@ find_update_path(List *evi_list,
/* Caller error if start == target */
Assert(evi_start != evi_target);
+ /* Caller error if reject_indirect and target is installable */
+ Assert(!(reject_indirect && evi_target->installable));
if (reinitialize)
{
@@ -1131,6 +1149,9 @@ find_update_path(List *evi_list,
ExtensionVersionInfo *evi2 = (ExtensionVersionInfo *) lfirst(lc);
int newdist;
+ /* if reject_indirect, treat installable versions as unreachable */
+ if (reject_indirect && evi2->installable)
+ continue;
newdist = evi->distance + 1;
if (newdist < evi2->distance)
{
@@ -1166,6 +1187,67 @@ find_update_path(List *evi_list,
return result;
}
+/*
+ * Given a target version that is not directly installable, find the
+ * best installation sequence starting from a directly-installable version.
+ *
+ * evi_list: previously-collected version update graph
+ * evi_target: member of that list that we want to reach
+ *
+ * Returns the best starting-point version, or NULL if there is none.
+ * On success, *best_path is set to the path from the start point.
+ *
+ * If there's more than one possible start point, prefer shorter update paths,
+ * and break any ties arbitrarily on the basis of strcmp'ing the starting
+ * versions' names.
+ */
+static ExtensionVersionInfo *
+find_install_path(List *evi_list, ExtensionVersionInfo *evi_target,
+ List **best_path)
+{
+ ExtensionVersionInfo *evi_start = NULL;
+ ListCell *lc;
+
+ *best_path = NIL;
+
+ /*
+ * We don't expect to be called for an installable target, but if we are,
+ * the answer is easy: just start from there, with an empty update path.
+ */
+ if (evi_target->installable)
+ return evi_target;
+
+ /* Consider all installable versions as start points */
+ foreach(lc, evi_list)
+ {
+ ExtensionVersionInfo *evi1 = (ExtensionVersionInfo *) lfirst(lc);
+ List *path;
+
+ if (!evi1->installable)
+ continue;
+
+ /*
+ * Find shortest path from evi1 to evi_target; but no need to consider
+ * paths going through other installable versions.
+ */
+ path = find_update_path(evi_list, evi1, evi_target, true, true);
+ if (path == NIL)
+ continue;
+
+ /* Remember best path */
+ if (evi_start == NULL ||
+ list_length(path) < list_length(*best_path) ||
+ (list_length(path) == list_length(*best_path) &&
+ strcmp(evi_start->name, evi1->name) < 0))
+ {
+ evi_start = evi1;
+ *best_path = path;
+ }
+ }
+
+ return evi_start;
+}
+
/*
* CREATE EXTENSION worker
*
@@ -1175,17 +1257,16 @@ find_update_path(List *evi_list,
* installed, allowing us to error out if we recurse to one of those.
*/
static ObjectAddress
-CreateExtensionInternal(ParseState *pstate, CreateExtensionStmt *stmt, List *parents)
+CreateExtensionInternal(char *extensionName,
+ char *schemaName,
+ char *versionName,
+ char *oldVersionName,
+ bool cascade,
+ List *parents,
+ bool is_create)
{
- DefElem *d_schema = NULL;
- DefElem *d_new_version = NULL;
- DefElem *d_old_version = NULL;
- DefElem *d_cascade = NULL;
- char *schemaName = NULL;
+ char *origSchemaName = schemaName;
Oid schemaOid = InvalidOid;
- char *versionName;
- char *oldVersionName;
- bool cascade = false;
Oid extowner = GetUserId();
ExtensionControlFile *pcontrol;
ExtensionControlFile *control;
@@ -1193,87 +1274,43 @@ CreateExtensionInternal(ParseState *pstate, CreateExtensionStmt *stmt, List *par
List *requiredExtensions;
List *requiredSchemas;
Oid extensionOid;
- ListCell *lc;
ObjectAddress address;
+ ListCell *lc;
/*
* Read the primary control file. Note we assume that it does not contain
* any non-ASCII data, so there is no need to worry about encoding at this
* point.
*/
- pcontrol = read_extension_control_file(stmt->extname);
-
- /*
- * Read the statement option list
- */
- foreach(lc, stmt->options)
- {
- DefElem *defel = (DefElem *) lfirst(lc);
-
- if (strcmp(defel->defname, "schema") == 0)
- {
- if (d_schema)
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("conflicting or redundant options"),
- parser_errposition(pstate, defel->location)));
- d_schema = defel;
- }
- else if (strcmp(defel->defname, "new_version") == 0)
- {
- if (d_new_version)
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("conflicting or redundant options"),
- parser_errposition(pstate, defel->location)));
- d_new_version = defel;
- }
- else if (strcmp(defel->defname, "old_version") == 0)
- {
- if (d_old_version)
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("conflicting or redundant options"),
- parser_errposition(pstate, defel->location)));
- d_old_version = defel;
- }
- else if (strcmp(defel->defname, "cascade") == 0)
- {
- if (d_cascade)
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("conflicting or redundant options"),
- parser_errposition(pstate, defel->location)));
- d_cascade = defel;
- cascade = defGetBoolean(d_cascade);
- }
- else
- elog(ERROR, "unrecognized option: %s", defel->defname);
- }
+ pcontrol = read_extension_control_file(extensionName);
/*
* Determine the version to install
*/
- if (d_new_version && d_new_version->arg)
- versionName = strVal(d_new_version->arg);
- else if (pcontrol->default_version)
- versionName = pcontrol->default_version;
- else
+ if (versionName == NULL)
{
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("version to install must be specified")));
- versionName = NULL; /* keep compiler quiet */
+ if (pcontrol->default_version)
+ versionName = pcontrol->default_version;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("version to install must be specified")));
}
check_valid_version_name(versionName);
/*
- * Determine the (unpackaged) version to update from, if any, and then
- * figure out what sequence of update scripts we need to apply.
+ * Figure out which script(s) we need to run to install the desired
+ * version of the extension. If we do not have a script that directly
+ * does what is needed, we try to find a sequence of update scripts that
+ * will get us there.
*/
- if (d_old_version && d_old_version->arg)
+ if (oldVersionName)
{
- oldVersionName = strVal(d_old_version->arg);
+ /*
+ * "FROM old_version" was specified, indicating that we're trying to
+ * update from some unpackaged version of the extension. Locate a
+ * series of update scripts that will do it.
+ */
check_valid_version_name(oldVersionName);
if (strcmp(oldVersionName, versionName) == 0)
@@ -1308,8 +1345,48 @@ CreateExtensionInternal(ParseState *pstate, CreateExtensionStmt *stmt, List *par
}
else
{
+ /*
+ * No FROM, so we're installing from scratch. If there is an install
+ * script for the desired version, we only need to run that one.
+ */
+ char *filename;
+ struct stat fst;
+
oldVersionName = NULL;
- updateVersions = NIL;
+
+ filename = get_extension_script_filename(pcontrol, NULL, versionName);
+ if (stat(filename, &fst) == 0)
+ {
+ /* Easy, no extra scripts */
+ updateVersions = NIL;
+ }
+ else
+ {
+ /* Look for best way to install this version */
+ List *evi_list;
+ ExtensionVersionInfo *evi_start;
+ ExtensionVersionInfo *evi_target;
+
+ /* Extract the version update graph from the script directory */
+ evi_list = get_ext_ver_list(pcontrol);
+
+ /* Identify the target version */
+ evi_target = get_ext_ver_info(versionName, &evi_list);
+
+ /* Identify best path to reach target */
+ evi_start = find_install_path(evi_list, evi_target,
+ &updateVersions);
+
+ /* Fail if no path ... */
+ if (evi_start == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("extension \"%s\" has no installation script nor update path for version \"%s\"",
+ pcontrol->name, versionName)));
+
+ /* Otherwise, install best starting point and then upgrade */
+ versionName = evi_start->name;
+ }
}
/*
@@ -1320,13 +1397,8 @@ CreateExtensionInternal(ParseState *pstate, CreateExtensionStmt *stmt, List *par
/*
* Determine the target schema to install the extension into
*/
- if (d_schema && d_schema->arg)
+ if (schemaName)
{
- /*
- * User given schema, CREATE EXTENSION ... WITH SCHEMA ...
- */
- schemaName = strVal(d_schema->arg);
-
/* If the user is giving us the schema name, it must exist already. */
schemaOid = get_namespace_oid(schemaName, false);
}
@@ -1374,7 +1446,7 @@ CreateExtensionInternal(ParseState *pstate, CreateExtensionStmt *stmt, List *par
else if (!OidIsValid(schemaOid))
{
/*
- * Neither user nor author of the extension specified schema, use the
+ * Neither user nor author of the extension specified schema; use the
* current default creation namespace, which is the first explicit
* entry in the search_path.
*/
@@ -1415,66 +1487,12 @@ CreateExtensionInternal(ParseState *pstate, CreateExtensionStmt *stmt, List *par
Oid reqext;
Oid reqschema;
- reqext = get_extension_oid(curreq, true);
- if (!OidIsValid(reqext))
- {
- if (cascade)
- {
- /* Must install it. */
- CreateExtensionStmt *ces;
- ListCell *lc2;
- ObjectAddress addr;
- List *cascade_parents;
-
- /* Check extension name validity before trying to cascade. */
- check_valid_extension_name(curreq);
-
- /* Check for cyclic dependency between extensions. */
- foreach(lc2, parents)
- {
- char *pname = (char *) lfirst(lc2);
-
- if (strcmp(pname, curreq) == 0)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_RECURSION),
- errmsg("cyclic dependency detected between extensions \"%s\" and \"%s\"",
- curreq, stmt->extname)));
- }
-
- ereport(NOTICE,
- (errmsg("installing required extension \"%s\"",
- curreq)));
-
- /* Build a CREATE EXTENSION statement to pass down. */
- ces = makeNode(CreateExtensionStmt);
- ces->extname = curreq;
- ces->if_not_exists = false;
-
- /* Propagate the CASCADE option. */
- ces->options = list_make1(d_cascade);
-
- /* Propagate the SCHEMA option if given. */
- if (d_schema && d_schema->arg)
- ces->options = lappend(ces->options, d_schema);
-
- /* Add current extension to list of parents to pass down. */
- cascade_parents =
- lappend(list_copy(parents), stmt->extname);
-
- /* Create the required extension. */
- addr = CreateExtensionInternal(pstate, ces, cascade_parents);
-
- /* Get its newly-assigned OID. */
- reqext = addr.objectId;
- }
- else
- ereport(ERROR,
- (errcode(ERRCODE_UNDEFINED_OBJECT),
- errmsg("required extension \"%s\" is not installed",
- curreq),
- errhint("Use CREATE EXTENSION ... CASCADE to install required extensions too.")));
- }
-
+ reqext = get_required_extension(curreq,
+ extensionName,
+ origSchemaName,
+ cascade,
+ parents,
+ is_create);
reqschema = get_extension_schema(reqext);
requiredExtensions = lappend_oid(requiredExtensions, reqext);
requiredSchemas = lappend_oid(requiredSchemas, reqschema);
@@ -1510,17 +1528,100 @@ CreateExtensionInternal(ParseState *pstate, CreateExtensionStmt *stmt, List *par
* though a series of ALTER EXTENSION UPDATE commands were given
*/
ApplyExtensionUpdates(extensionOid, pcontrol,
- versionName, updateVersions);
+ versionName, updateVersions,
+ origSchemaName, cascade, is_create);
return address;
}
+/*
+ * Get the OID of an extension listed in "requires", possibly creating it.
+ */
+static Oid
+get_required_extension(char *reqExtensionName,
+ char *extensionName,
+ char *origSchemaName,
+ bool cascade,
+ List *parents,
+ bool is_create)
+{
+ Oid reqExtensionOid;
+
+ reqExtensionOid = get_extension_oid(reqExtensionName, true);
+ if (!OidIsValid(reqExtensionOid))
+ {
+ if (cascade)
+ {
+ /* Must install it. */
+ ObjectAddress addr;
+ List *cascade_parents;
+ ListCell *lc;
+
+ /* Check extension name validity before trying to cascade. */
+ check_valid_extension_name(reqExtensionName);
+
+ /* Check for cyclic dependency between extensions. */
+ foreach(lc, parents)
+ {
+ char *pname = (char *) lfirst(lc);
+
+ if (strcmp(pname, reqExtensionName) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_RECURSION),
+ errmsg("cyclic dependency detected between extensions \"%s\" and \"%s\"",
+ reqExtensionName, extensionName)));
+ }
+
+ ereport(NOTICE,
+ (errmsg("installing required extension \"%s\"",
+ reqExtensionName)));
+
+ /* Add current extension to list of parents to pass down. */
+ cascade_parents = lappend(list_copy(parents), extensionName);
+
+ /*
+ * Create the required extension. We propagate the SCHEMA option
+ * if any, and CASCADE, but no other options.
+ */
+ addr = CreateExtensionInternal(reqExtensionName,
+ origSchemaName,
+ NULL,
+ NULL,
+ cascade,
+ cascade_parents,
+ is_create);
+
+ /* Get its newly-assigned OID. */
+ reqExtensionOid = addr.objectId;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("required extension \"%s\" is not installed",
+ reqExtensionName),
+ is_create ?
+ errhint("Use CREATE EXTENSION ... CASCADE to install required extensions too.") : 0));
+ }
+
+ return reqExtensionOid;
+}
+
/*
* CREATE EXTENSION
*/
ObjectAddress
CreateExtension(ParseState *pstate, CreateExtensionStmt *stmt)
{
+ DefElem *d_schema = NULL;
+ DefElem *d_new_version = NULL;
+ DefElem *d_old_version = NULL;
+ DefElem *d_cascade = NULL;
+ char *schemaName = NULL;
+ char *versionName = NULL;
+ char *oldVersionName = NULL;
+ bool cascade = false;
+ ListCell *lc;
+
/* Check extension name validity before any filesystem access */
check_valid_extension_name(stmt->extname);
@@ -1556,8 +1657,63 @@ CreateExtension(ParseState *pstate, CreateExtensionStmt *stmt)
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("nested CREATE EXTENSION is not supported")));
- /* Finally create the extension. */
- return CreateExtensionInternal(pstate, stmt, NIL);
+ /* Deconstruct the statement option list */
+ foreach(lc, stmt->options)
+ {
+ DefElem *defel = (DefElem *) lfirst(lc);
+
+ if (strcmp(defel->defname, "schema") == 0)
+ {
+ if (d_schema)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options"),
+ parser_errposition(pstate, defel->location)));
+ d_schema = defel;
+ schemaName = defGetString(d_schema);
+ }
+ else if (strcmp(defel->defname, "new_version") == 0)
+ {
+ if (d_new_version)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options"),
+ parser_errposition(pstate, defel->location)));
+ d_new_version = defel;
+ versionName = defGetString(d_new_version);
+ }
+ else if (strcmp(defel->defname, "old_version") == 0)
+ {
+ if (d_old_version)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options"),
+ parser_errposition(pstate, defel->location)));
+ d_old_version = defel;
+ oldVersionName = defGetString(d_old_version);
+ }
+ else if (strcmp(defel->defname, "cascade") == 0)
+ {
+ if (d_cascade)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options"),
+ parser_errposition(pstate, defel->location)));
+ d_cascade = defel;
+ cascade = defGetBoolean(d_cascade);
+ }
+ else
+ elog(ERROR, "unrecognized option: %s", defel->defname);
+ }
+
+ /* Call CreateExtensionInternal to do the real work. */
+ return CreateExtensionInternal(stmt->extname,
+ schemaName,
+ versionName,
+ oldVersionName,
+ cascade,
+ NIL,
+ true);
}
/*
@@ -1914,43 +2070,28 @@ get_available_versions_for_extension(ExtensionControlFile *pcontrol,
Tuplestorestate *tupstore,
TupleDesc tupdesc)
{
- int extnamelen = strlen(pcontrol->name);
- char *location;
- DIR *dir;
- struct dirent *de;
+ List *evi_list;
+ ListCell *lc;
- location = get_extension_script_directory(pcontrol);
- dir = AllocateDir(location);
- /* Note this will fail if script directory doesn't exist */
- while ((de = ReadDir(dir, location)) != NULL)
+ /* Extract the version update graph from the script directory */
+ evi_list = get_ext_ver_list(pcontrol);
+
+ /* For each installable version ... */
+ foreach(lc, evi_list)
{
+ ExtensionVersionInfo *evi = (ExtensionVersionInfo *) lfirst(lc);
ExtensionControlFile *control;
- char *vername;
Datum values[7];
bool nulls[7];
+ ListCell *lc2;
- /* must be a .sql file ... */
- if (!is_extension_script_filename(de->d_name))
- continue;
-
- /* ... matching extension name followed by separator */
- if (strncmp(de->d_name, pcontrol->name, extnamelen) != 0 ||
- de->d_name[extnamelen] != '-' ||
- de->d_name[extnamelen + 1] != '-')
- continue;
-
- /* extract version name from 'extname--something.sql' filename */
- vername = pstrdup(de->d_name + extnamelen + 2);
- *strrchr(vername, '.') = '\0';
-
- /* ignore it if it's an update script */
- if (strstr(vername, "--"))
+ if (!evi->installable)
continue;
/*
* Fetch parameters for specific version (pcontrol is not changed)
*/
- control = read_extension_aux_control_file(pcontrol, vername);
+ control = read_extension_aux_control_file(pcontrol, evi->name);
memset(values, 0, sizeof(values));
memset(nulls, 0, sizeof(nulls));
@@ -1959,7 +2100,7 @@ get_available_versions_for_extension(ExtensionControlFile *pcontrol,
values[0] = DirectFunctionCall1(namein,
CStringGetDatum(control->name));
/* version */
- values[1] = CStringGetTextDatum(vername);
+ values[1] = CStringGetTextDatum(evi->name);
/* superuser */
values[2] = BoolGetDatum(control->superuser);
/* relocatable */
@@ -1974,27 +2115,7 @@ get_available_versions_for_extension(ExtensionControlFile *pcontrol,
if (control->requires == NIL)
nulls[5] = true;
else
- {
- Datum *datums;
- int ndatums;
- ArrayType *a;
- ListCell *lc;
-
- ndatums = list_length(control->requires);
- datums = (Datum *) palloc(ndatums * sizeof(Datum));
- ndatums = 0;
- foreach(lc, control->requires)
- {
- char *curreq = (char *) lfirst(lc);
-
- datums[ndatums++] =
- DirectFunctionCall1(namein, CStringGetDatum(curreq));
- }
- a = construct_array(datums, ndatums,
- NAMEOID,
- NAMEDATALEN, false, 'c');
- values[5] = PointerGetDatum(a);
- }
+ values[5] = convert_requires_to_datum(control->requires);
/* comment */
if (control->comment == NULL)
nulls[6] = true;
@@ -2002,9 +2123,75 @@ get_available_versions_for_extension(ExtensionControlFile *pcontrol,
values[6] = CStringGetTextDatum(control->comment);
tuplestore_putvalues(tupstore, tupdesc, values, nulls);
+
+ /*
+ * Find all non-directly-installable versions that would be installed
+ * starting from this version, and report them, inheriting the
+ * parameters that aren't changed in updates from this version.
+ */
+ foreach(lc2, evi_list)
+ {
+ ExtensionVersionInfo *evi2 = (ExtensionVersionInfo *) lfirst(lc2);
+ List *best_path;
+
+ if (evi2->installable)
+ continue;
+ if (find_install_path(evi_list, evi2, &best_path) == evi)
+ {
+ /*
+ * Fetch parameters for this version (pcontrol is not changed)
+ */
+ control = read_extension_aux_control_file(pcontrol, evi2->name);
+
+ /* name stays the same */
+ /* version */
+ values[1] = CStringGetTextDatum(evi2->name);
+ /* superuser */
+ values[2] = BoolGetDatum(control->superuser);
+ /* relocatable */
+ values[3] = BoolGetDatum(control->relocatable);
+ /* schema stays the same */
+ /* requires */
+ if (control->requires == NIL)
+ nulls[5] = true;
+ else
+ {
+ values[5] = convert_requires_to_datum(control->requires);
+ nulls[5] = false;
+ }
+ /* comment stays the same */
+
+ tuplestore_putvalues(tupstore, tupdesc, values, nulls);
+ }
+ }
}
+}
- FreeDir(dir);
+/*
+ * Convert a list of extension names to a name[] Datum
+ */
+static Datum
+convert_requires_to_datum(List *requires)
+{
+ Datum *datums;
+ int ndatums;
+ ArrayType *a;
+ ListCell *lc;
+
+ ndatums = list_length(requires);
+ datums = (Datum *) palloc(ndatums * sizeof(Datum));
+ ndatums = 0;
+ foreach(lc, requires)
+ {
+ char *curreq = (char *) lfirst(lc);
+
+ datums[ndatums++] =
+ DirectFunctionCall1(namein, CStringGetDatum(curreq));
+ }
+ a = construct_array(datums, ndatums,
+ NAMEOID,
+ NAMEDATALEN, false, 'c');
+ return PointerGetDatum(a);
}
/*
@@ -2076,7 +2263,7 @@ pg_extension_update_paths(PG_FUNCTION_ARGS)
continue;
/* Find shortest path from evi1 to evi2 */
- path = find_update_path(evi_list, evi1, evi2, true);
+ path = find_update_path(evi_list, evi1, evi2, false, true);
/* Emit result row */
memset(values, 0, sizeof(values));
@@ -2808,7 +2995,8 @@ ExecAlterExtensionStmt(ParseState *pstate, AlterExtensionStmt *stmt)
* time
*/
ApplyExtensionUpdates(extensionOid, control,
- oldVersionName, updateVersions);
+ oldVersionName, updateVersions,
+ NULL, false, false);
ObjectAddressSet(address, ExtensionRelationId, extensionOid);
@@ -2827,7 +3015,10 @@ static void
ApplyExtensionUpdates(Oid extensionOid,
ExtensionControlFile *pcontrol,
const char *initialVersion,
- List *updateVersions)
+ List *updateVersions,
+ char *origSchemaName,
+ bool cascade,
+ bool is_create)
{
const char *oldVersionName = initialVersion;
ListCell *lcv;
@@ -2906,8 +3097,9 @@ ApplyExtensionUpdates(Oid extensionOid,
heap_close(extRel, RowExclusiveLock);
/*
- * Look up the prerequisite extensions for this version, and build
- * lists of their OIDs and the OIDs of their target schemas.
+ * Look up the prerequisite extensions for this version, install them
+ * if necessary, and build lists of their OIDs and the OIDs of their
+ * target schemas.
*/
requiredExtensions = NIL;
requiredSchemas = NIL;
@@ -2917,16 +3109,12 @@ ApplyExtensionUpdates(Oid extensionOid,
Oid reqext;
Oid reqschema;
- /*
- * We intentionally don't use get_extension_oid's default error
- * message here, because it would be confusing in this context.
- */
- reqext = get_extension_oid(curreq, true);
- if (!OidIsValid(reqext))
- ereport(ERROR,
- (errcode(ERRCODE_UNDEFINED_OBJECT),
- errmsg("required extension \"%s\" is not installed",
- curreq)));
+ reqext = get_required_extension(curreq,
+ control->name,
+ origSchemaName,
+ cascade,
+ NIL,
+ is_create);
reqschema = get_extension_schema(reqext);
requiredExtensions = lappend_oid(requiredExtensions, reqext);
requiredSchemas = lappend_oid(requiredSchemas, reqschema);
--
cgit v1.2.3
From c3c0d7bd701dae4737c974a59ffa9b366110f9c1 Mon Sep 17 00:00:00 2001
From: Simon Riggs
Date: Sun, 11 Sep 2016 23:26:18 +0100
Subject: Raise max setting of checkpoint_timeout to 1d
Previously checkpoint_timeout was capped at 3600s
New max setting is 86400s = 24h = 1d
Discussion: 32558.1454471895@sss.pgh.pa.us
---
doc/src/sgml/config.sgml | 2 +-
src/backend/utils/misc/guc.c | 2 +-
src/backend/utils/misc/postgresql.conf.sample | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 7c483c6ef3..cd66abc8ba 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -2614,7 +2614,7 @@ include_dir 'conf.d'
Maximum time between automatic WAL checkpoints, in seconds.
- The valid range is between 30 seconds and one hour.
+ The valid range is between 30 seconds and one day.
The default is five minutes (5min>).
Increasing this parameter can increase the amount of time needed
for crash recovery.
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index c5178f7cad..c72bd6190a 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2250,7 +2250,7 @@ static struct config_int ConfigureNamesInt[] =
GUC_UNIT_S
},
&CheckPointTimeout,
- 300, 30, 3600,
+ 300, 30, 86400,
NULL, NULL, NULL
},
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 6d0666c44f..b1c3aea9ee 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -203,7 +203,7 @@
# - Checkpoints -
-#checkpoint_timeout = 5min # range 30s-1h
+#checkpoint_timeout = 5min # range 30s-1d
#max_wal_size = 1GB
#min_wal_size = 80MB
#checkpoint_completion_target = 0.5 # checkpoint target duration, 0.0 - 1.0
--
cgit v1.2.3
From 9083353b15c3cf8e7bbac104a81ad42281178cdf Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Mon, 12 Sep 2016 12:00:00 -0400
Subject: pg_basebackup: Clean created directories on failure
Like initdb, clean up created data and xlog directories, unless the new
-n/--noclean option is specified.
Tablespace directories are not cleaned up, but a message is written
about that.
Reviewed-by: Masahiko Sawada
---
doc/src/sgml/ref/pg_basebackup.sgml | 18 +++++
src/bin/pg_basebackup/pg_basebackup.c | 98 ++++++++++++++++++++++++++--
src/bin/pg_basebackup/t/010_pg_basebackup.pl | 10 ++-
3 files changed, 119 insertions(+), 7 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index 03615da480..9f1eae12d8 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -398,6 +398,24 @@ PostgreSQL documentation
+
+
-n
+
--noclean
+
+
+ By default, when pg_basebackup aborts with an
+ error, it removes any directories it might have created before
+ discovering that it cannot finish the job (for example, data directory
+ and transaction log directory). This option inhibits tidying-up and is
+ thus useful for debugging.
+
+
+
+ Note that tablespace directories are not cleaned up either way.
+
+
+
+
-P
--progress
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 351a42068f..42f3b273a6 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -58,6 +58,7 @@ static TablespaceList tablespace_dirs = {NULL, NULL};
static char *xlog_dir = "";
static char format = 'p'; /* p(lain)/t(ar) */
static char *label = "pg_basebackup base backup";
+static bool noclean = false;
static bool showprogress = false;
static int verbose = 0;
static int compresslevel = 0;
@@ -69,6 +70,13 @@ static int standby_message_timeout = 10 * 1000; /* 10 sec = default */
static pg_time_t last_progress_report = 0;
static int32 maxrate = 0; /* no limit by default */
+static bool success = false;
+static bool made_new_pgdata = false;
+static bool found_existing_pgdata = false;
+static bool made_new_xlogdir = false;
+static bool found_existing_xlogdir = false;
+static bool made_tablespace_dirs = false;
+static bool found_tablespace_dirs = false;
/* Progress counters */
static uint64 totalsize;
@@ -82,6 +90,7 @@ static int bgpipe[2] = {-1, -1};
/* Handle to child process */
static pid_t bgchild = -1;
+static bool in_log_streamer = false;
/* End position for xlog streaming, empty string if unknown yet */
static XLogRecPtr xlogendptr;
@@ -98,7 +107,7 @@ static PQExpBuffer recoveryconfcontents = NULL;
/* Function headers */
static void usage(void);
static void disconnect_and_exit(int code);
-static void verify_dir_is_empty_or_create(char *dirname);
+static void verify_dir_is_empty_or_create(char *dirname, bool *created, bool *found);
static void progress_report(int tablespacenum, const char *filename, bool force);
static void ReceiveTarFile(PGconn *conn, PGresult *res, int rownum);
@@ -114,6 +123,69 @@ static const char *get_tablespace_mapping(const char *dir);
static void tablespace_list_append(const char *arg);
+static void
+cleanup_directories_atexit(void)
+{
+ if (success || in_log_streamer)
+ return;
+
+ if (!noclean)
+ {
+ if (made_new_pgdata)
+ {
+ fprintf(stderr, _("%s: removing data directory \"%s\"\n"),
+ progname, basedir);
+ if (!rmtree(basedir, true))
+ fprintf(stderr, _("%s: failed to remove data directory\n"),
+ progname);
+ }
+ else if (found_existing_pgdata)
+ {
+ fprintf(stderr,
+ _("%s: removing contents of data directory \"%s\"\n"),
+ progname, basedir);
+ if (!rmtree(basedir, false))
+ fprintf(stderr, _("%s: failed to remove contents of data directory\n"),
+ progname);
+ }
+
+ if (made_new_xlogdir)
+ {
+ fprintf(stderr, _("%s: removing transaction log directory \"%s\"\n"),
+ progname, xlog_dir);
+ if (!rmtree(xlog_dir, true))
+ fprintf(stderr, _("%s: failed to remove transaction log directory\n"),
+ progname);
+ }
+ else if (found_existing_xlogdir)
+ {
+ fprintf(stderr,
+ _("%s: removing contents of transaction log directory \"%s\"\n"),
+ progname, xlog_dir);
+ if (!rmtree(xlog_dir, false))
+ fprintf(stderr, _("%s: failed to remove contents of transaction log directory\n"),
+ progname);
+ }
+ }
+ else
+ {
+ if (made_new_pgdata || found_existing_pgdata)
+ fprintf(stderr,
+ _("%s: data directory \"%s\" not removed at user's request\n"),
+ progname, basedir);
+
+ if (made_new_xlogdir || found_existing_xlogdir)
+ fprintf(stderr,
+ _("%s: transaction log directory \"%s\" not removed at user's request\n"),
+ progname, xlog_dir);
+ }
+
+ if (made_tablespace_dirs || found_tablespace_dirs)
+ fprintf(stderr,
+ _("%s: changes to tablespace directories will not be undone"),
+ progname);
+}
+
static void
disconnect_and_exit(int code)
{
@@ -253,6 +325,7 @@ usage(void)
printf(_(" -c, --checkpoint=fast|spread\n"
" set fast or spread checkpointing\n"));
printf(_(" -l, --label=LABEL set backup label\n"));
+ printf(_(" -n, --noclean do not clean up after errors\n"));
printf(_(" -P, --progress show progress information\n"));
printf(_(" -v, --verbose output verbose messages\n"));
printf(_(" -V, --version output version information, then exit\n"));
@@ -375,6 +448,8 @@ LogStreamerMain(logstreamer_param *param)
{
StreamCtl stream;
+ in_log_streamer = true;
+
MemSet(&stream, 0, sizeof(stream));
stream.startpos = param->startptr;
stream.timeline = param->timeline;
@@ -501,7 +576,7 @@ StartLogStreamer(char *startpos, uint32 timeline, char *sysidentifier)
* be give and the process ended.
*/
static void
-verify_dir_is_empty_or_create(char *dirname)
+verify_dir_is_empty_or_create(char *dirname, bool *created, bool *found)
{
switch (pg_check_dir(dirname))
{
@@ -517,12 +592,16 @@ verify_dir_is_empty_or_create(char *dirname)
progname, dirname, strerror(errno));
disconnect_and_exit(1);
}
+ if (created)
+ *created = true;
return;
case 1:
/*
* Exists, empty
*/
+ if (found)
+ *found = true;
return;
case 2:
case 3:
@@ -1683,7 +1762,7 @@ BaseBackup(void)
{
char *path = (char *) get_tablespace_mapping(PQgetvalue(res, i, 1));
- verify_dir_is_empty_or_create(path);
+ verify_dir_is_empty_or_create(path, &made_tablespace_dirs, &found_tablespace_dirs);
}
}
@@ -1892,6 +1971,7 @@ main(int argc, char **argv)
{"gzip", no_argument, NULL, 'z'},
{"compress", required_argument, NULL, 'Z'},
{"label", required_argument, NULL, 'l'},
+ {"noclean", no_argument, NULL, 'n'},
{"dbname", required_argument, NULL, 'd'},
{"host", required_argument, NULL, 'h'},
{"port", required_argument, NULL, 'p'},
@@ -1926,7 +2006,9 @@ main(int argc, char **argv)
}
}
- while ((c = getopt_long(argc, argv, "D:F:r:RT:xX:l:zZ:d:c:h:p:U:s:S:wWvP",
+ atexit(cleanup_directories_atexit);
+
+ while ((c = getopt_long(argc, argv, "D:F:r:RT:xX:l:nzZ:d:c:h:p:U:s:S:wWvP",
long_options, &option_index)) != -1)
{
switch (c)
@@ -2001,6 +2083,9 @@ main(int argc, char **argv)
case 'l':
label = pg_strdup(optarg);
break;
+ case 'n':
+ noclean = true;
+ break;
case 'z':
#ifdef HAVE_LIBZ
compresslevel = Z_DEFAULT_COMPRESSION;
@@ -2170,14 +2255,14 @@ main(int argc, char **argv)
* unless we are writing to stdout.
*/
if (format == 'p' || strcmp(basedir, "-") != 0)
- verify_dir_is_empty_or_create(basedir);
+ verify_dir_is_empty_or_create(basedir, &made_new_pgdata, &found_existing_pgdata);
/* Create transaction log symlink, if required */
if (strcmp(xlog_dir, "") != 0)
{
char *linkloc;
- verify_dir_is_empty_or_create(xlog_dir);
+ verify_dir_is_empty_or_create(xlog_dir, &made_new_xlogdir, &found_existing_xlogdir);
/* form name of the place where the symlink must go */
linkloc = psprintf("%s/pg_xlog", basedir);
@@ -2198,5 +2283,6 @@ main(int argc, char **argv)
BaseBackup();
+ success = true;
return 0;
}
diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
index 6c33936d25..fd9857d67b 100644
--- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl
+++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
@@ -4,7 +4,7 @@ use Cwd;
use Config;
use PostgresNode;
use TestLib;
-use Test::More tests => 51;
+use Test::More tests => 54;
program_help_ok('pg_basebackup');
program_version_ok('pg_basebackup');
@@ -40,6 +40,14 @@ $node->command_fails(
[ 'pg_basebackup', '-D', "$tempdir/backup" ],
'pg_basebackup fails because of WAL configuration');
+ok(! -d "$tempdir/backup", 'backup directory was cleaned up');
+
+$node->command_fails(
+ [ 'pg_basebackup', '-D', "$tempdir/backup", '-n' ],
+ 'failing run with noclean option');
+
+ok(-d "$tempdir/backup", 'backup directory was created and left behind');
+
open CONF, ">>$pgdata/postgresql.conf";
print CONF "max_replication_slots = 10\n";
print CONF "max_wal_senders = 10\n";
--
cgit v1.2.3
From 42fd984c0b7b53d1bc961c9ed6bb84fe28eae52b Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Mon, 12 Sep 2016 19:19:24 -0400
Subject: Docs: assorted minor cleanups.
Standardize on "user_name" for a field name in related examples in
ddl.sgml; before we had variously "user_name", "username", and "user".
The last is flat wrong because it conflicts with a reserved word.
Be consistent about entry capitalization in a table in func.sgml.
Fix a typo in pgtrgm.sgml.
Back-patch to 9.6 and 9.5 as relevant.
Alexander Law
---
doc/src/sgml/ddl.sgml | 44 ++++++++++++++++++++++----------------------
doc/src/sgml/func.sgml | 2 +-
doc/src/sgml/pgtrgm.sgml | 2 +-
3 files changed, 24 insertions(+), 24 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml
index a393813b38..f43352c2a9 100644
--- a/doc/src/sgml/ddl.sgml
+++ b/doc/src/sgml/ddl.sgml
@@ -1629,7 +1629,7 @@ CREATE POLICY account_managers ON accounts TO managers
CREATE POLICY user_policy ON users
- USING (user = current_user);
+ USING (user_name = current_user);
@@ -1642,7 +1642,7 @@ CREATE POLICY user_policy ON users
CREATE POLICY user_policy ON users
USING (true)
- WITH CHECK (user = current_user);
+ WITH CHECK (user_name = current_user);
@@ -1662,7 +1662,7 @@ CREATE POLICY user_policy ON users
-- Simple passwd-file based example
CREATE TABLE passwd (
- username text UNIQUE NOT NULL,
+ user_name text UNIQUE NOT NULL,
pwhash text,
uid int PRIMARY KEY,
gid int NOT NULL,
@@ -1696,9 +1696,9 @@ CREATE POLICY all_view ON passwd FOR SELECT USING (true);
-- Normal users can update their own records, but
-- limit which shells a normal user is allowed to set
CREATE POLICY user_mod ON passwd FOR UPDATE
- USING (current_user = username)
+ USING (current_user = user_name)
WITH CHECK (
- current_user = username AND
+ current_user = user_name AND
shell IN ('/bin/bash','/bin/sh','/bin/dash','/bin/zsh','/bin/tcsh')
);
@@ -1706,7 +1706,7 @@ CREATE POLICY user_mod ON passwd FOR UPDATE
GRANT SELECT, INSERT, UPDATE, DELETE ON passwd TO admin;
-- Users only get select access on public columns
GRANT SELECT
- (username, uid, gid, real_name, home_phone, extra_info, home_dir, shell)
+ (user_name, uid, gid, real_name, home_phone, extra_info, home_dir, shell)
ON passwd TO public;
-- Allow users to update certain columns
GRANT UPDATE
@@ -1725,11 +1725,11 @@ GRANT UPDATE
postgres=> set role admin;
SET
postgres=> table passwd;
- username | pwhash | uid | gid | real_name | home_phone | extra_info | home_dir | shell
-----------+--------+-----+-----+-----------+--------------+------------+-------------+-----------
- admin | xxx | 0 | 0 | Admin | 111-222-3333 | | /root | /bin/dash
- bob | xxx | 1 | 1 | Bob | 123-456-7890 | | /home/bob | /bin/zsh
- alice | xxx | 2 | 1 | Alice | 098-765-4321 | | /home/alice | /bin/zsh
+ user_name | pwhash | uid | gid | real_name | home_phone | extra_info | home_dir | shell
+-----------+--------+-----+-----+-----------+--------------+------------+-------------+-----------
+ admin | xxx | 0 | 0 | Admin | 111-222-3333 | | /root | /bin/dash
+ bob | xxx | 1 | 1 | Bob | 123-456-7890 | | /home/bob | /bin/zsh
+ alice | xxx | 2 | 1 | Alice | 098-765-4321 | | /home/alice | /bin/zsh
(3 rows)
-- Test what Alice is able to do
@@ -1737,26 +1737,26 @@ postgres=> set role alice;
SET
postgres=> table passwd;
ERROR: permission denied for relation passwd
-postgres=> select username,real_name,home_phone,extra_info,home_dir,shell from passwd;
- username | real_name | home_phone | extra_info | home_dir | shell
-----------+-----------+--------------+------------+-------------+-----------
- admin | Admin | 111-222-3333 | | /root | /bin/dash
- bob | Bob | 123-456-7890 | | /home/bob | /bin/zsh
- alice | Alice | 098-765-4321 | | /home/alice | /bin/zsh
+postgres=> select user_name,real_name,home_phone,extra_info,home_dir,shell from passwd;
+ user_name | real_name | home_phone | extra_info | home_dir | shell
+-----------+-----------+--------------+------------+-------------+-----------
+ admin | Admin | 111-222-3333 | | /root | /bin/dash
+ bob | Bob | 123-456-7890 | | /home/bob | /bin/zsh
+ alice | Alice | 098-765-4321 | | /home/alice | /bin/zsh
(3 rows)
-postgres=> update passwd set username = 'joe';
+postgres=> update passwd set user_name = 'joe';
ERROR: permission denied for relation passwd
-- Alice is allowed to change her own real_name, but no others
postgres=> update passwd set real_name = 'Alice Doe';
UPDATE 1
-postgres=> update passwd set real_name = 'John Doe' where username = 'admin';
+postgres=> update passwd set real_name = 'John Doe' where user_name = 'admin';
UPDATE 0
postgres=> update passwd set shell = '/bin/xx';
ERROR: new row violates WITH CHECK OPTION for "passwd"
postgres=> delete from passwd;
ERROR: permission denied for relation passwd
-postgres=> insert into passwd (username) values ('xxx');
+postgres=> insert into passwd (user_name) values ('xxx');
ERROR: permission denied for relation passwd
-- Alice can change her own password; RLS silently prevents updating other rows
postgres=> update passwd set pwhash = 'abc';
@@ -2055,7 +2055,7 @@ DROP SCHEMA myschema CASCADE;
(since this is one of the ways to restrict the activities of your
users to well-defined namespaces). The syntax for that is:
-CREATE SCHEMA schemaname AUTHORIZATION username;
+CREATE SCHEMA schema_name AUTHORIZATION user_name;
You can even omit the schema name, in which case the schema name
will be the same as the user name. See username>.tablename>.
+ user_name>.table_name>.
This is how PostgreSQL will effectively
behave if you create a per-user schema for every user.
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 5148095fb3..47fcb30da0 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -9523,7 +9523,7 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
ts_filter(vectortsvector>, weights"char"[]>)tsvector
- Select only elements with given weights from vector
+ select only elements with given weights from vectorts_filter('fat:2,4 cat:3b rat:5A'::tsvector, '{a,b}')'cat':3B 'rat':5A
diff --git a/doc/src/sgml/pgtrgm.sgml b/doc/src/sgml/pgtrgm.sgml
index d362b03cf3..775a7b8be7 100644
--- a/doc/src/sgml/pgtrgm.sgml
+++ b/doc/src/sgml/pgtrgm.sgml
@@ -104,7 +104,7 @@
the second string a most similar word not a most similar substring. The
range of the result is zero (indicating that the two strings are
completely dissimilar) to one (indicating that the first string is
- identical to one of the word of the second string).
+ identical to one of the words of the second string).
--
cgit v1.2.3
From 5225c66336a1e4b46925e9f169086fc70f49736f Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Thu, 15 Sep 2016 17:24:54 -0400
Subject: Clarify policy on marking inherited constraints as valid.
Amit Langote and Robert Haas
---
doc/src/sgml/ref/alter_table.sgml | 14 +++++++++-----
src/backend/commands/tablecmds.c | 3 ++-
2 files changed, 11 insertions(+), 6 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml
index 6f51cbc896..e48ccf21e4 100644
--- a/doc/src/sgml/ref/alter_table.sgml
+++ b/doc/src/sgml/ref/alter_table.sgml
@@ -1028,11 +1028,15 @@ ALTER TABLE ALL IN TABLESPACE name
If a table has any descendant tables, it is not permitted to add,
- rename, or change the type of a column, or rename an inherited constraint
- in the parent table without doing
- the same to the descendants. That is, ALTER TABLE ONLY
- will be rejected. This ensures that the descendants always have
- columns matching the parent.
+ rename, or change the type of a column in the parent table without doing
+ same to the descendants. This ensures that the descendants always have
+ columns matching the parent. Similarly, a constraint cannot be renamed
+ in the parent without also renaming it in all descendents, so that
+ constraints also match between the parent and its descendents.
+ Also, because selecting from the parent also selects from its descendents,
+ a constraint on the parent cannot be marked valid unless it is also marked
+ valid for those descendents. In all of these cases, ALTER TABLE
+ ONLY will be rejected.
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 86e98148c1..d31276284c 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -6908,7 +6908,8 @@ ATExecValidateConstraint(Relation rel, char *constrName, bool recurse,
/*
* If we are told not to recurse, there had better not be any
- * child tables; else the addition would put them out of step.
+ * child tables, because we can't mark the constraint on the
+ * parent valid unless it is valid for all child tables.
*/
if (!recurse)
ereport(ERROR,
--
cgit v1.2.3
From 2c8f0d6e53e5dbcf28ee127303b81a6e12942665 Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Mon, 19 Sep 2016 13:38:21 -0400
Subject: Update recovery_min_apply_delay docs for remote_apply mode.
Bernd Helmle, reviewed by Thomas Munro, tweaked by me.
---
doc/src/sgml/recovery-config.sgml | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/recovery-config.sgml b/doc/src/sgml/recovery-config.sgml
index de3fb10f5b..8c24ae2174 100644
--- a/doc/src/sgml/recovery-config.sgml
+++ b/doc/src/sgml/recovery-config.sgml
@@ -487,10 +487,17 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows
This parameter is intended for use with streaming replication deployments;
however, if the parameter is specified it will be honored in all cases.
- Synchronous replication is not affected by this setting because there is
- not yet any setting to request synchronous apply of transaction commits.
+
hot_standby_feedback> will be delayed by use of this feature
which could lead to bloat on the master; use both together with care.
+
+
+
+ Synchronous replication is affected by this setting when synchronous_commit
+ is set to remote_apply; every COMMIT
+ will need to wait to be applied.
+
+
--
cgit v1.2.3
From 6cc54f38a9fe1f4103c45a9858804d1d5d4de0fd Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas
Date: Mon, 19 Sep 2016 21:56:16 +0300
Subject: Remove obsolete warning from docs.
Python 2.4 and Fedora 4 are both obsolete at this point, especially
unpatched debug builds.
Discussion: <85e377b2-d459-396e-59b1-115548bbc059@iki.fi>
---
doc/src/sgml/plpython.sgml | 13 -------------
1 file changed, 13 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/plpython.sgml b/doc/src/sgml/plpython.sgml
index 905e757ab6..bb69c752b8 100644
--- a/doc/src/sgml/plpython.sgml
+++ b/doc/src/sgml/plpython.sgml
@@ -696,19 +696,6 @@ AS $$
$$ LANGUAGE plpythonu;
-
-
- Due to Python
- bug #1483133,
- some debug versions of Python 2.4
- (configured and compiled with option --with-pydebug)
- are known to crash the PostgreSQL server
- when using an iterator to return a set result.
- Unpatched versions of Fedora 4 contain this bug.
- It does not happen in production versions of Python or on patched
- versions of Fedora 4.
-
-
--
cgit v1.2.3
From 4f6494cfd26c1dfe708c4598c11eea5fce168fd1 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Tue, 20 Sep 2016 12:00:00 -0400
Subject: doc: Correct ALTER USER MAPPING example
The existing example threw an error.
From: gabrielle
---
doc/src/sgml/ref/alter_user_mapping.sgml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/alter_user_mapping.sgml b/doc/src/sgml/ref/alter_user_mapping.sgml
index 3a908130d8..3be54afee5 100644
--- a/doc/src/sgml/ref/alter_user_mapping.sgml
+++ b/doc/src/sgml/ref/alter_user_mapping.sgml
@@ -89,9 +89,9 @@ ALTER USER MAPPING FOR { user_nameExamples
- Change the password for user mapping bob>, server foo>:
+ Change the password for user mapping bob>, server foo>:
-ALTER USER MAPPING FOR bob SERVER foo OPTIONS (user 'bob', password 'public');
+ALTER USER MAPPING FOR bob SERVER foo OPTIONS (SET password 'public');
--
cgit v1.2.3
From 16d1adb35cf887325b7c5dbf473632d557065171 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Tue, 20 Sep 2016 12:00:00 -0400
Subject: doc: Fix documentation to match actual make output
based on patch from Takeshi Ideriha
---
doc/src/sgml/installation.sgml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml
index 14a6d57aea..f6de18ed2d 100644
--- a/doc/src/sgml/installation.sgml
+++ b/doc/src/sgml/installation.sgml
@@ -1510,7 +1510,7 @@ su - postgres
will take a few minutes depending on your
hardware. The last line displayed should be:
-All of PostgreSQL is successfully made. Ready to install.
+All of PostgreSQL successfully made. Ready to install.
@@ -1523,7 +1523,7 @@ All of PostgreSQL is successfully made. Ready to install.
The last line displayed should be:
-PostgreSQL, contrib and HTML documentation successfully made. Ready to install.
+PostgreSQL, contrib, and documentation successfully made. Ready to install.
--
cgit v1.2.3
From 46b55e7f853dc0ef60ae3b1042b883fa4ffac95f Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Tue, 20 Sep 2016 12:00:00 -0400
Subject: pg_restore: Add -N option to exclude schemas
This is similar to the -N option in pg_dump, except that it doesn't take
a pattern, just like the existing -n option in pg_restore.
From: Michael Banck
---
doc/src/sgml/ref/pg_restore.sgml | 18 +++++++++++++++++-
src/bin/pg_dump/pg_backup.h | 1 +
src/bin/pg_dump/pg_backup_archiver.c | 5 +++++
src/bin/pg_dump/pg_restore.c | 8 +++++++-
4 files changed, 30 insertions(+), 2 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/pg_restore.sgml b/doc/src/sgml/ref/pg_restore.sgml
index c9069193af..bd5b405314 100644
--- a/doc/src/sgml/ref/pg_restore.sgml
+++ b/doc/src/sgml/ref/pg_restore.sgml
@@ -302,7 +302,7 @@
-
-n namespace
+
-n schema
--schema=schema
@@ -314,6 +314,22 @@
+
+
-N schema
+
--exclude-schema=schema
+
+
+ Do not restore objects that are in the named schema. Multiple schemas
+ to be excluded may be specified with multiple
-N> switches.
+
+
+
+ When both
-n> and
-N> are given for the same
+ schema name, the
-N> switch wins and the schema is excluded.
+
+
+
+
-O
--no-owner
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index 4afa92f5f6..0a28124cf6 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -99,6 +99,7 @@ typedef struct _restoreOptions
SimpleStringList indexNames;
SimpleStringList functionNames;
SimpleStringList schemaNames;
+ SimpleStringList schemaExcludeNames;
SimpleStringList triggerNames;
SimpleStringList tableNames;
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 05bdbdbf02..a69b06f6d7 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -2751,6 +2751,11 @@ _tocEntryRequired(TocEntry *te, teSection curSection, RestoreOptions *ropt)
return 0;
}
+ if (ropt->schemaExcludeNames.head != NULL
+ && te->namespace
+ && simple_string_list_member(&ropt->schemaExcludeNames, te->namespace))
+ return 0;
+
if (ropt->selTypes)
{
if (strcmp(te->desc, "TABLE") == 0 ||
diff --git a/src/bin/pg_dump/pg_restore.c b/src/bin/pg_dump/pg_restore.c
index fb08e6bb8e..b21fd263b0 100644
--- a/src/bin/pg_dump/pg_restore.c
+++ b/src/bin/pg_dump/pg_restore.c
@@ -85,6 +85,7 @@ main(int argc, char **argv)
{"data-only", 0, NULL, 'a'},
{"dbname", 1, NULL, 'd'},
{"exit-on-error", 0, NULL, 'e'},
+ {"exclude-schema", 1, NULL, 'N'},
{"file", 1, NULL, 'f'},
{"format", 1, NULL, 'F'},
{"function", 1, NULL, 'P'},
@@ -148,7 +149,7 @@ main(int argc, char **argv)
}
}
- while ((c = getopt_long(argc, argv, "acCd:ef:F:h:I:j:lL:n:Op:P:RsS:t:T:U:vwWx1",
+ while ((c = getopt_long(argc, argv, "acCd:ef:F:h:I:j:lL:n:N:Op:P:RsS:t:T:U:vwWx1",
cmdopts, NULL)) != -1)
{
switch (c)
@@ -196,6 +197,10 @@ main(int argc, char **argv)
simple_string_list_append(&opts->schemaNames, optarg);
break;
+ case 'N': /* Do not dump data for this schema */
+ simple_string_list_append(&opts->schemaExcludeNames, optarg);
+ break;
+
case 'O':
opts->noOwner = 1;
break;
@@ -456,6 +461,7 @@ usage(const char *progname)
printf(_(" -L, --use-list=FILENAME use table of contents from this file for\n"
" selecting/ordering output\n"));
printf(_(" -n, --schema=NAME restore only objects in this schema\n"));
+ printf(_(" -N, --exclude-schema=NAME do not restore objects in this schema\n"));
printf(_(" -O, --no-owner skip restoration of object ownership\n"));
printf(_(" -P, --function=NAME(args) restore named function\n"));
printf(_(" -s, --schema-only restore only the schema, no data\n"));
--
cgit v1.2.3
From 60270e5e00850ee8cc34296e38d0000415c8b152 Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Wed, 21 Sep 2016 08:37:02 -0400
Subject: Add more parallel query documentation.
Previously, the individual settings were documented, but there was
no overall discussion of the capabilities and limitations of the
feature. Add that.
Patch by me, reviewed by Peter Eisentraut and Álvaro Herrera.
---
doc/src/sgml/config.sgml | 5 +
doc/src/sgml/filelist.sgml | 1 +
doc/src/sgml/parallel.sgml | 472 +++++++++++++++++++++++++++++++++++++++++++++
doc/src/sgml/postgres.sgml | 1 +
4 files changed, 479 insertions(+)
create mode 100644 doc/src/sgml/parallel.sgml
(limited to 'doc/src')
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index cd66abc8ba..a848a7edd1 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -2027,6 +2027,11 @@ include_dir 'conf.d'
as much CPU time, memory, I/O bandwidth, and so forth as a query which
uses no workers at all.
+
+
+ For more information on parallel query, see
+ .
+
diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml
index 43837114ba..69649a7da4 100644
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -24,6 +24,7 @@
+
diff --git a/doc/src/sgml/parallel.sgml b/doc/src/sgml/parallel.sgml
new file mode 100644
index 0000000000..c80d42dbef
--- /dev/null
+++ b/doc/src/sgml/parallel.sgml
@@ -0,0 +1,472 @@
+
+
+
+ Parallel Query
+
+
+ parallel query
+
+
+
+ PostgreSQL> can devise query plans which can leverage
+ multiple CPUs in order to answer queries faster. This feature is known
+ as parallel query. Many queries cannot benefit from parallel query, either
+ due to limitations of the current implementation or because there is no
+ imaginable query plan which is any faster than the serial query plan.
+ However, for queries that can benefit, the speedup from parallel query
+ is often very significant. Many queries can run more than twice as fast
+ when using parallel query, and some queries can run four times faster or
+ even more. Queries that touch a large amount of data but return only a
+ few rows to the user will typically benefit most. This chapter explains
+ some details of how parallel query works and in which situations it can be
+ used so that users who wish to make use of it can understand what to expect.
+
+
+
+ How Parallel Query Works
+
+
+ When the optimizer determines that parallel query is the fastest execution
+ strategy for a particular query, it will create a query plan which includes
+ a Gather node. Here is a simple example:
+
+
+EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
+ QUERY PLAN
+-------------------------------------------------------------------------------------
+ Gather (cost=1000.00..217018.43 rows=1 width=97)
+ Workers Planned: 2
+ -> Parallel Seq Scan on pgbench_accounts (cost=0.00..216018.33 rows=1 width=97)
+ Filter: (filler ~~ '%x%'::text)
+(4 rows)
+
+
+
+
+ In all cases, the Gather node will have exactly one
+ child plan, which is the portion of the plan that will be executed in
+ parallel. If the Gather> node is at the very top of the plan
+ tree, then the entire query will execute in parallel. If it is somewhere
+ else in the plan tree, then only that portion of the query will run in
+ parallel. In the example above, the query accesses only one table, so
+ there is only one plan node other than the Gather> node itself;
+ since that plan node is a child of the Gather> node, it will
+ run in parallel.
+
+
+
+ Using EXPLAIN>, you can see the number of
+ workers chosen by the planner. When the Gather> node is reached
+ during query execution, the process which is implementing the user's
+ session will request a number of background
+ worker processes equal to the number
+ of workers chosen by the planner. The total number of background
+ workers that can exist at any one time is limited by
+ , so it is possible for a
+ parallel query to run with fewer workers than planned, or even with
+ no workers at all. The optimal plan may depend on the number of workers
+ that are available, so this can result in poor query performance. If this
+ occurrence is frequent, considering increasing
+ max_worker_processes> so that more workers can be run
+ simultaneously or alternatively reducing
+ so that the planner
+ requests fewer workers.
+
+
+
+ Every background worker process which is successfully started for a given
+ parallel query will execute the portion of the plan which is a descendent
+ of the Gather> node. The leader will also execute that portion
+ of the plan, but it has an additional responsibility: it must also read
+ all of the tuples generated by the workers. When the parallel portion of
+ the plan generates only a small number of tuples, the leader will often
+ behave very much like an additional worker, speeding up query execution.
+ Conversely, when the parallel portion of the plan generates a large number
+ of tuples, the leader may be almost entirely occupied with reading the
+ tuples generated by the workers and performing any further processing
+ steps which are required by plan nodes above the level of the
+ Gather node. In such cases, the leader will do very
+ little of the work of executing the parallel portion of the plan.
+
+
+
+
+ When Can Parallel Query Be Used?
+
+
+ There are several settings which can cause the query planner not to
+ generate a parallel query plan under any circumstances. In order for
+ any parallel query plans whatsoever to be generated, the following
+ settings must be configured as indicated.
+
+
+
+
+
+ must be set to a
+ value which is greater than zero. This is a special case of the more
+ general principle that no more workers should be used than the number
+ configured via max_parallel_workers_per_gather.
+
+
+
+
+
+ must be set to a
+ value other than none>. Parallel query requires dynamic
+ shared memory in order to pass data between cooperating processes.
+
+
+
+
+
+ In addition, the system must not be running in single-user mode. Since
+ the entire database system is running in single process in this situation,
+ no background workers will be available.
+
+
+
+ Even when it is in general possible for parallel query plans to be
+ generated, the planner will not generate them for a given query
+ if any of the following are true:
+
+
+
+
+
+ The query writes any data or locks any database rows. If a query
+ contains a data-modifying operation either at the top level or within
+ a CTE, no parallel plans for that query will be generated. This is a
+ limitation of the current implementation which could be lifted in a
+ future release.
+
+
+
+
+
+ The query might be suspended during execution. In any situation in
+ which the system thinks that partial or incremental execution might
+ occur, no parallel plan is generated. For example, a cursor created
+ using DECLARE CURSOR will never use
+ a parallel plan. Similarly, a PL/pgsql loop of the form
+ FOR x IN query LOOP .. END LOOP will never use a
+ parallel plan, because the parallel query system is unable to verify
+ that the code in the loop is safe to execute while parallel query is
+ active.
+
+
+
+
+
+ The query uses any function marked PARALLEL UNSAFE.
+ Most system-defined functions are PARALLEL SAFE,
+ but user-defined functions are marked PARALLEL
+ UNSAFE by default. See the discussion of
+ .
+
+
+
+
+
+ The query is running inside of another query that is already parallel.
+ For example, if a function called by a parallel query issues an SQL
+ query itself, that query will never use a parallel plan. This is a
+ limitation of the current implementation, but it may not be desirable
+ to remove this limitation, since it could result in a single query
+ using a very large number of processes.
+
+
+
+
+
+ The transaction isolation level is serializable. This is
+ a limitation of the current implementation.
+
+
+
+
+
+ Even when parallel query plan is generated for a particular query, there
+ are several circumstances under which it will be impossible to execute
+ that plan in parallel at execution time. If this occurs, the leader
+ will execute the portion of the plan between below the Gather>
+ node entirely by itself, almost as if the Gather> node were
+ not present. This will happen if any of the following conditions are met:
+
+
+
+
+
+ No background workers can be obtained because of the limitation that
+ the total number of background workers cannot exceed
+ .
+
+
+
+
+
+ The client sends an Execute message with a non-zero fetch count.
+ See the discussion of the
+ extended query protocol.
+ Since libpq currently provides no way to
+ send such a message, this can only occur when using a client that
+ does not rely on libpq. If this is a frequent
+ occurrence, it may be a good idea to set
+ in sessions
+ where it is likely, so as to avoid generating query plans that may
+ be suboptimal when run serially.
+
+
+
+
+
+ The transaction isolation level is serializable. This situation
+ does not normally arise, because parallel query plans are not
+ generated when the transaction isolation level is serializable.
+ However, it can happen if the transaction isolation level is changed to
+ serializable after the plan is generated and before it is executed.
+
+
+
+
+
+
+ Parallel Plans
+
+
+ Because each worker executes the parallel portion of the plan to
+ completion, it is not possible to simply take an ordinary query plan
+ and run it using multiple workers. Each worker would produce a full
+ copy of the output result set, so the query would not run any faster
+ than normal but would produce incorrect results. Instead, the parallel
+ portion of the plan must be what is known internally to the query
+ optimizer as a partial plan>; that is, it must constructed
+ so that each process will which executes the plan will generate only a
+ subset of the output rows in such a way that each required output row
+ is guaranteed to be generated by exactly one of the cooperating processes.
+
+
+
+ Parallel Scans
+
+
+ Currently, the only type of scan which has been modified to work with
+ parallel query is a sequential scan. Therefore, the driving table in
+ a parallel plan will always be scanned using a
+ Parallel Seq Scan>. The relation's blocks will be divided
+ among the cooperating processes. Blocks are handed out one at a
+ time, so that access to the relation remains sequential. Each process
+ will visit every tuple on the page assigned to it before requesting a new
+ page.
+
+
+
+
+ Parallel Joins
+
+
+ The driving table may be joined to one or more other tables using nested
+ loops or hash joins. The outer side of the join may be any kind of
+ non-parallel plan that is otherwise supported by the planner provided that
+ it is safe to run within a parallel worker. For example, it may be an
+ index scan which looks up a value based on a column taken from the inner
+ table. Each worker will execute the outer side of the plan in full, which
+ is why merge joins are not supported here. The outer side of a merge join
+ will often involve sorting the entire inner table; even if it involves an
+ index, it is unlikely to be productive to have multiple processes each
+ conduct a full index scan of the inner table.
+
+
+
+
+ Parallel Aggregation
+
+ It is not possible to perform the aggregation portion of a query entirely
+ in parallel. For example, if a query involves selecting
+ COUNT(*)>, each worker could compute a total, but those totals
+ would need to combined in order to produce a final answer. If the query
+ involved a GROUP BY> clause, a separate total would need to
+ be computed for each group. Even though aggregation can't be done entirely
+ in parallel, queries involving aggregation are often excellent candidates
+ for parallel query, because they typically read many rows but return only
+ a few rows to the client. Queries that return many rows to the client
+ are often limited by the speed at which the client can read the data,
+ in which case parallel query cannot help very much.
+
+
+
+ PostgreSQL> supports parallel aggregation by aggregating
+ twice. First, each process participating in the parallel portion of the
+ query performs an aggregation step, producing a partial result for each
+ group of which that process is aware. This is reflected in the plan as
+ a PartialAggregate> node. Second, the partial results are
+ transferred to the leader via the Gather> node. Finally, the
+ leader re-aggregates the results across all workers in order to produce
+ the final result. This is reflected in the plan as a
+ FinalizeAggregate> node.
+
+
+
+ Parallel aggregation is not supported in all situations. Each aggregate
+ must be safe> for parallelism and must
+ have a combine function. If the aggregate has a transition state of type
+ internal>, it must have serialization and deserialization
+ functions. See for more details.
+ Parallel aggregation is not supported for ordered set aggregates or when
+ the query involves GROUPING SETS>. It can only be used when
+ all joins involved in the query are also part of the parallel portion
+ of the plan.
+
+
+
+
+
+ Parallel Plan Tips
+
+
+ If a query that is expected to do so does not produce a parallel plan,
+ you can try reducing or
+ . Of course, this plan may turn
+ out to be slower than the serial plan which the planner preferred, but
+ this will not always be the case. If you don't get a parallel
+ plan even with very small values of these settings (e.g. after setting
+ them both to zero), there may be some reason why the query planner is
+ unable to generate a parallel plan for your query. See
+ and
+ for information on why this may be
+ the case.
+
+
+
+ When executing a parallel plan, you can use EXPLAIN (ANALYZE,
+ VERBOSE) will display per-worker statistics for each plan node.
+ This may be useful in determining whether the work is being evenly
+ distributed between all plan nodes and more generally in understanding the
+ performance characteristics of the plan.
+
+
+
+
+
+
+ Parallel Safety
+
+
+ The planner classifies operations involved in a query as either
+ parallel safe>, parallel restricted>,
+ or parallel unsafe>. A parallel safe operation is one which
+ does not conflict with the use of parallel query. A parallel restricted
+ operation is one which cannot be performed in a parallel worker, but which
+ can be performed in the leader while parallel query is in use. Therefore,
+ parallel restricted operations can never occur below a Gather>
+ node, but can occur elsewhere in a plan which contains a
+ Gather> node. A parallel unsafe operation is one which cannot
+ be performed while parallel query is in use, not even in the leader.
+ When a query contains anything which is parallel unsafe, parallel query
+ is completely disabled for that query.
+
+
+
+ The following operations are always parallel restricted.
+
+
+
+
+
+ Scans of common table expressions (CTEs).
+
+
+
+
+
+ Scans of temporary tables.
+
+
+
+
+
+ Scans of foreign tables, unless the foreign data wrapper has
+ an IsForeignScanParallelSafe> API which indicates otherwise.
+
+
+
+
+
+ Access to an InitPlan> or SubPlan>.
+
+
+
+
+
+ Parallel Labeling for Functions and Aggregates
+
+
+ The planner cannot automatically determine whether a user-defined
+ function or aggregate is parallel safe, parallel restricted, or parallel
+ unsafe, because this would require predicting every operation which the
+ function could possibly perform. In general, this is equivalent to the
+ Halting Problem and therefore impossible. Even for simple functions
+ where it conceivably be done, we do not try, since this would be expensive
+ and error-prone. Instead, all user-defined functions are assumed to
+ be parallel unsafe unless otherwise marked. When using
+ or
+ , markings can be set by specifying
+ PARALLEL SAFE>, PARALLEL RESTRICTED>, or
+ PARALLEL UNSAFE> as appropriate. When using
+ , the
+ PARALLEL> option can be specified with SAFE>,
+ RESTRICTED>, or UNSAFE> as the corresponding value.
+
+
+
+ Functions and aggregates must be marked PARALLEL UNSAFE> if
+ they write to the database, access sequences, change the transaction state
+ even temporarily (e.g. a PL/pgsql function which establishes an
+ EXCEPTION> block to catch errors), or make persistent changes to
+ settings. Similarly, functions must be marked PARALLEL
+ RESTRICTED> if they access temporary tables, client connection state,
+ cursors, prepared statements, or miscellaneous backend-local state which
+ the system cannot synchronize across workers. For example,
+ setseed> and random> are parallel restricted for
+ this last reason.
+
+
+
+ In general, if a function is labeled as being safe when it is restricted or
+ unsafe, or if it is labeled as being restricted when it is in fact unsafe,
+ it may throw errors or produce wrong answers when used in a parallel query.
+ C-language functions could in theory exhibit totally undefined behavior if
+ mislabeled, since there is no way for the system to protect itself against
+ arbitrary C code, but in most likely cases the result will be no worse than
+ for any other function. If in doubt, it is probably best to label functions
+ as UNSAFE>.
+
+
+
+ If a function executed within a parallel worker acquires locks which are
+ not held by the leader, for example by querying a table not referenced in
+ the query, those locks will be released at worker exit, not end of
+ transaction. If you write a function which does this, and this behavior
+ difference is important to you, mark such functions as
+ PARALLEL RESTRICTED
+ to ensure that they execute only in the leader.
+
+
+
+ Note that the query planner does not consider deferring the evaluation of
+ parallel-restricted functions or aggregates involved in the query in
+ order to obtain a superior plan. So, for example, if a WHERE>
+ clause applied to a particular table is parallel restricted, the query
+ planner will not consider placing the scan of that table below a
+ Gather> node. In some cases, it would be
+ possible (and perhaps even efficient) to include the scan of that table in
+ the parallel portion of the query and defer the evaluation of the
+ WHERE> clause so that it happens above the Gather>
+ node. However, the planner does not do this.
+
+
+
+
+
+
+
diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml
index 0346d367e5..9143917c49 100644
--- a/doc/src/sgml/postgres.sgml
+++ b/doc/src/sgml/postgres.sgml
@@ -106,6 +106,7 @@
&textsearch;
&mvcc;
&perform;
+ ∥
--
cgit v1.2.3
From e7010ce4794a4c12a6a8bfb0ca1de49b61046847 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Fri, 5 Aug 2016 21:35:19 -0400
Subject: pg_ctl: Add wait option to promote action
When waiting is selected for the promote action, look into pg_control
until the state changes, then use the PQping-based waiting until the
server is reachable.
Reviewed-by: Michael Paquier
---
doc/src/sgml/ref/pg_ctl-ref.sgml | 29 ++++++++++++++++++++------
src/bin/pg_ctl/pg_ctl.c | 45 ++++++++++++++++++++++++++++------------
src/bin/pg_ctl/t/003_promote.pl | 18 +++++++++++++++-
3 files changed, 72 insertions(+), 20 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/pg_ctl-ref.sgml b/doc/src/sgml/ref/pg_ctl-ref.sgml
index 6ceb7816dc..a00c355f4a 100644
--- a/doc/src/sgml/ref/pg_ctl-ref.sgml
+++ b/doc/src/sgml/ref/pg_ctl-ref.sgml
@@ -91,6 +91,8 @@ PostgreSQL documentation
pg_ctl
- The maximum number of seconds to wait when waiting for startup or
- shutdown to complete. Defaults to the value of the
+ The maximum number of seconds to wait when waiting for an operation
+ to complete (see option
-w
). Defaults to the value of the
PGCTLTIMEOUT> environment variable or, if not set, to 60
seconds.
@@ -383,8 +385,23 @@ PostgreSQL documentation
-w
- Wait for the startup or shutdown to complete.
- Waiting is the default option for shutdowns, but not startups.
+ Wait for an operation to complete. This is supported for the
+ modes start, stop,
+ restart, promote,
+ and register.
+
+
+
+ Waiting is the default option for shutdowns, but not startups,
+ restarts, or promotions. This is mainly for historical reasons; the
+ waiting option is almost always preferable. If waiting is not
+ selected, the requested action is triggered, but there is no feedback
+ about its success. In that case, the server log file or an external
+ monitoring system would have to be used to check the progress and
+ success of the operation.
+
+
+
When waiting for startup, pg_ctl repeatedly
attempts to connect to the server.
When waiting for shutdown, pg_ctl waits for
@@ -400,8 +417,8 @@ PostgreSQL documentation
-W
- Do not wait for startup or shutdown to complete. This is the
- default for start and restart modes.
+ Do not wait for an operation to complete. This is the opposite of the
+ option
-w
.
diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c
index eb8a67a903..2f0976a9cc 100644
--- a/src/bin/pg_ctl/pg_ctl.c
+++ b/src/bin/pg_ctl/pg_ctl.c
@@ -1228,7 +1228,34 @@ do_promote(void)
exit(1);
}
- print_msg(_("server promoting\n"));
+ if (do_wait)
+ {
+ DBState state = DB_STARTUP;
+
+ print_msg(_("waiting for server to promote..."));
+ while (wait_seconds > 0)
+ {
+ state = get_control_dbstate();
+ if (state == DB_IN_PRODUCTION)
+ break;
+
+ print_msg(".");
+ pg_usleep(1000000); /* 1 sec */
+ wait_seconds--;
+ }
+ if (state == DB_IN_PRODUCTION)
+ {
+ print_msg(_(" done\n"));
+ print_msg(_("server promoted\n"));
+ }
+ else
+ {
+ print_msg(_(" stopped waiting\n"));
+ print_msg(_("server is still promoting\n"));
+ }
+ }
+ else
+ print_msg(_("server promoting\n"));
}
@@ -2405,18 +2432,10 @@ main(int argc, char **argv)
if (!wait_set)
{
- switch (ctl_command)
- {
- case RESTART_COMMAND:
- case START_COMMAND:
- do_wait = false;
- break;
- case STOP_COMMAND:
- do_wait = true;
- break;
- default:
- break;
- }
+ if (ctl_command == STOP_COMMAND)
+ do_wait = true;
+ else
+ do_wait = false;
}
if (ctl_command == RELOAD_COMMAND)
diff --git a/src/bin/pg_ctl/t/003_promote.pl b/src/bin/pg_ctl/t/003_promote.pl
index 1461234f2a..0b6090b6eb 100644
--- a/src/bin/pg_ctl/t/003_promote.pl
+++ b/src/bin/pg_ctl/t/003_promote.pl
@@ -3,7 +3,7 @@ use warnings;
use PostgresNode;
use TestLib;
-use Test::More tests => 9;
+use Test::More tests => 12;
my $tempdir = TestLib::tempdir;
@@ -37,3 +37,19 @@ command_ok([ 'pg_ctl', '-D', $node_standby->data_dir, 'promote' ],
ok($node_standby->poll_query_until('postgres', 'SELECT NOT pg_is_in_recovery()'),
'promoted standby is not in recovery');
+
+# same again with wait option
+$node_standby = get_new_node('standby2');
+$node_standby->init_from_backup($node_primary, 'my_backup', has_streaming => 1);
+$node_standby->start;
+
+is($node_standby->safe_psql('postgres', 'SELECT pg_is_in_recovery()'),
+ 't', 'standby is in recovery');
+
+command_ok([ 'pg_ctl', '-D', $node_standby->data_dir, '-w', 'promote' ],
+ 'pg_ctl -w promote of standby runs');
+
+# no wait here
+
+is($node_standby->safe_psql('postgres', 'SELECT pg_is_in_recovery()'),
+ 'f', 'promoted standby is not in recovery');
--
cgit v1.2.3
From 5a7bae0699657315b487896810a30bd7425f6a08 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Fri, 23 Sep 2016 14:22:07 -0400
Subject: Doc: fix examples of # operators so they actually work.
These worked as-is until around 7.0, but fail in newer versions because
there are more operators named "#". Besides it's a bit inconsistent that
only two of the examples on this page lack type names on their constants.
Report: <20160923081530.1517.75670@wrigleys.postgresql.org>
---
doc/src/sgml/func.sgml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 47fcb30da0..3cc69bbffd 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -8365,12 +8365,12 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
#Point or box of intersection
- '((1,-1),(-1,1))' # '((1,1),(-1,-1))'
+ box '((1,-1),(-1,1))' # box '((1,1),(-2,-2))'#Number of points in path or polygon
- # '((1,0),(0,1),(-1,0))'
+ # path '((1,0),(0,1),(-1,0))'@-@
--
cgit v1.2.3
From 98c2d3332b30006ff71add99bc9d619c9457e71f Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Sat, 24 Sep 2016 16:25:35 -0400
Subject: Do a final round of updates on the 9.6 release notes.
Set release date, document a few recent commits, do one last pass of
copy-editing.
---
doc/src/sgml/catalogs.sgml | 9 +++--
doc/src/sgml/release-9.6.sgml | 83 +++++++++++++++++++++++++++++++++----------
2 files changed, 71 insertions(+), 21 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 322d8d6dc7..29738b07cb 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -7596,6 +7596,11 @@
application.
+
+ By default, the pg_config view can be read
+ only by superusers.
+
+
pg_config> Columns
@@ -7771,8 +7776,8 @@
- The pg_file_settings view can be read only by
- superusers.
+ By default, the pg_file_settings view can be read
+ only by superusers.
diff --git a/doc/src/sgml/release-9.6.sgml b/doc/src/sgml/release-9.6.sgml
index ddd280c85a..5c40910c72 100644
--- a/doc/src/sgml/release-9.6.sgml
+++ b/doc/src/sgml/release-9.6.sgml
@@ -6,8 +6,7 @@
Release Date
- 2016-??-??
- Current as of 2016-08-27 (commit b9fe6cbc8)
+ 2016-09-29
@@ -56,7 +55,7 @@
Substantial performance improvements, especially in the area of
- scalability on multi-CPU>-socket servers
+ scalability on multi-CPU>-socket servers
@@ -269,7 +268,7 @@ This commit is also listed under libpq and psql
Write
--no-psqlrc
(or its
abbreviation
-X
) explicitly to obtain the old
- behavior. Scripts modified this way will still work with old
+ behavior. Scripts so modified will still work with old
versions of psql>.
@@ -371,6 +370,7 @@ and many others in the same vein
2016-06-09 [c9ce4a1c6] Eliminate "parallel degree" terminology.
2016-06-16 [75be66464] Invent min_parallel_relation_size GUC to replace a hard-
2016-08-16 [f85b1a841] Disable parallel query by default.
+2016-09-15 [72ce78162] Make min_parallel_relation_size's default value platform
-->
Parallel queries (Robert Haas, Amit Kapila, David Rowley,
@@ -504,6 +504,7 @@ and many others in the same vein
Improve sorting performance by using quicksort, not replacement
@@ -693,7 +694,7 @@ and many others in the same vein
(a,b) REFERENCES r (x,y)>, then a WHERE>
condition such as t.a = r.x AND t.b = r.y> cannot
select more than one r> row per t> row.
- The planner formerly considered AND> conditions
+ The planner formerly considered these AND> conditions
to be independent and would often drastically misestimate
selectivity as a result. Now it compares the WHERE>
conditions to applicable foreign key constraints and produces
@@ -731,7 +732,7 @@ and many others in the same vein
containing only already-frozen tuples are identified in the table's
visibility map, and can be skipped by vacuum even when doing
transaction wraparound prevention. This should greatly reduce the
- cost of maintaining large tables containing mostly-unchanged data.
+ cost of maintaining large tables containing mostly-unchanging data.
@@ -872,7 +873,8 @@ and many others in the same vein
from where it will be flushed to physical storage in due time.
Many operating systems are not smart about managing this and allow
large amounts of dirty data to accumulate before deciding to flush
- it all at once, leading to long delays for new I/O requests.
+ it all at once, causing long delays for new I/O requests until the
+ flushing finishes.
This change attempts to alleviate this problem by explicitly
requesting data flushes after a configurable interval.
@@ -1209,7 +1211,7 @@ and many others in the same vein
2016-04-08 [34c33a1f0] Add BSD authentication method.
-->
- Add a bsd> authentication
+ Add a BSD> authentication
method to allow use of
the BSD> Authentication service for
PostgreSQL> client authentication (Marisa Emerson)
@@ -1300,6 +1302,16 @@ and many others in the same vein
+
+ Raise the maximum allowed value
+ of to 24 hours (Simon Riggs)
+
+
+
+
+
@@ -1346,9 +1358,9 @@ and many others in the same vein
Making a distinction between these settings is no longer useful,
- and is part of a planned future simplification of replication
- setup. The old names are still accepted but are converted
- internally.
+ and merging them is a step towards a planned future simplification
+ of replication setup. The old names are still accepted but are
+ converted to replica> internally.
@@ -1375,7 +1387,7 @@ and many others in the same vein
-->
Allow the server's SSL> key file to have group read
- access if owned by root (Christoph Berg)
+ access if it is owned by root> (Christoph Berg)
@@ -1616,7 +1628,7 @@ XXX this is pending backpatch, may need to remove
Previously, such cases failed if the same target column was
mentioned more than once, e.g., INSERT INTO tab (x[1],
- x[2]) VALUES ...>.
+ x[2]) VALUES (...)>.
@@ -1797,9 +1809,9 @@ XXX this is pending backpatch, may need to remove
2016-03-23 [473b93287] Support CREATE ACCESS METHOD
-->
- Introduce CREATE ACCESS METHOD> to allow extensions
- to create index access methods (Alexander Korotkov, Petr
- Jelínek)
+ Introduce CREATE
+ ACCESS METHOD>> to allow extensions to create index access
+ methods (Alexander Korotkov, Petr Jelínek)
@@ -1912,7 +1924,7 @@ XXX this is pending backpatch, may need to remove
Formerly, many security-sensitive functions contained hard-wired
checks that would throw an error if they were called by a
- non-superuser role. This forced the use of superuser roles for
+ non-superuser. This forced the use of superuser roles for
some relatively pedestrian tasks. The hard-wired error checks
are now gone in favor of making initdb> revoke the
default public EXECUTE> privilege on these functions.
@@ -1931,6 +1943,11 @@ XXX this is pending backpatch, may need to remove
that can be used to grant access to what were previously
superuser-only functions (Stephen Frost)
+
+
+ Currently the only such role is pg_signal_backend>,
+ but more are expected to be added in future.
+
@@ -2211,7 +2228,7 @@ XXX this is pending backpatch, may need to remove
Allow ts_stat()>>
and tsvector_update_trigger()>>
to operate on values that are of types binary-compatible with the
- expected argument type, not only that argument type; for example
+ expected argument type, not just exactly that type; for example
allow citext> where text> is expected (Teodor
Sigaev)
@@ -2623,10 +2640,26 @@ This commit is also listed under psql and PL/pgSQL
+
+ Allow pg_dump> to dump non-extension-owned objects
+ that are within an extension-owned schema
+ (Martín Marqués)
+
+
+
+ Previously such objects were ignored because they were mistakenly
+ assumed to belong to the extension owning their schema.
+
+
+
+
+
- In pg_dump>, include the table name in object
+ In pg_dump> output, include the table name in object
tags for object types that are only uniquely named per-table
(for example, triggers) (Peter Eisentraut)
@@ -2912,6 +2945,7 @@ This commit is also listed under libpq and PL/pgSQL
Allow changing the selection probabilities (weights) for scripts
@@ -3011,6 +3045,7 @@ This commit is also listed under libpq and PL/pgSQL
Speed up initdb> by using just one
@@ -3420,6 +3455,16 @@ This commit is also listed under libpq and PL/pgSQL
+
+
+
+ Support OpenSSL 1.1.0 (Andreas Karlsson, Heikki Linnakangas)
+
+
+
--
cgit v1.2.3
From da6c4f6ca88df346573bdada2aa2544510bf167e Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Sun, 25 Sep 2016 15:40:57 -0400
Subject: Refer to OS X as "macOS", except for the port name which is still
"darwin".
We weren't terribly consistent about whether to call Apple's OS "OS X"
or "Mac OS X", and the former is probably confusing to people who aren't
Apple users. Now that Apple has rebranded it "macOS", follow their lead
to establish a consistent naming pattern. Also, avoid the use of the
ancient project name "Darwin", except as the port code name which does not
seem desirable to change. (In short, this patch touches documentation and
comments, but no actual code.)
I didn't touch contrib/start-scripts/osx/, either. I suspect those are
obsolete and due for a rewrite, anyway.
I dithered about whether to apply this edit to old release notes, but
those were responsible for quite a lot of the inconsistencies, so I ended
up changing them too. Anyway, Apple's being ahistorical about this,
so why shouldn't we be?
---
config/c-library.m4 | 4 ++--
configure | 8 ++++----
configure.in | 12 ++++++------
doc/src/sgml/client-auth.sgml | 2 +-
doc/src/sgml/dfunc.sgml | 4 ++--
doc/src/sgml/docguide.sgml | 2 +-
doc/src/sgml/install-windows.sgml | 2 +-
doc/src/sgml/installation.sgml | 6 +++---
doc/src/sgml/monitoring.sgml | 2 +-
doc/src/sgml/release-7.4.sgml | 14 +++++++-------
doc/src/sgml/release-8.0.sgml | 10 +++++-----
doc/src/sgml/release-8.1.sgml | 10 +++++-----
doc/src/sgml/release-8.2.sgml | 4 ++--
doc/src/sgml/release-8.3.sgml | 4 ++--
doc/src/sgml/release-8.4.sgml | 6 +++---
doc/src/sgml/release-9.0.sgml | 8 ++++----
doc/src/sgml/release-9.1.sgml | 8 ++++----
doc/src/sgml/release-9.2.sgml | 6 +++---
doc/src/sgml/release-9.3.sgml | 10 +++++-----
doc/src/sgml/release-9.4.sgml | 6 +++---
doc/src/sgml/release-9.5.sgml | 2 +-
doc/src/sgml/release-old.sgml | 2 +-
doc/src/sgml/runtime.sgml | 18 +++++++++---------
doc/src/sgml/uuid-ossp.sgml | 2 +-
doc/src/sgml/wal.sgml | 2 +-
src/Makefile.shlib | 2 +-
src/backend/Makefile | 4 ++--
src/backend/port/dynloader/darwin.c | 2 +-
src/backend/postmaster/postmaster.c | 2 +-
src/backend/utils/adt/varlena.c | 4 ++--
src/backend/utils/misc/ps_status.c | 4 ++--
src/backend/utils/probes.d | 2 +-
src/bin/psql/input.c | 2 +-
src/include/port/darwin.h | 2 +-
src/interfaces/libpq/fe-connect.c | 2 +-
src/port/README | 2 +-
src/port/chklocale.c | 2 +-
src/template/darwin | 11 +++++++----
src/test/regress/pg_regress.c | 4 ++--
src/tools/find_typedef | 2 +-
40 files changed, 102 insertions(+), 99 deletions(-)
(limited to 'doc/src')
diff --git a/config/c-library.m4 b/config/c-library.m4
index 50d068d3fb..d330b0cf95 100644
--- a/config/c-library.m4
+++ b/config/c-library.m4
@@ -292,8 +292,8 @@ AC_MSG_RESULT([$pgac_cv_snprintf_size_t_support])
# PGAC_TYPE_LOCALE_T
# ------------------
-# Check for the locale_t type and find the right header file. Mac OS
-# X needs xlocale.h; standard is locale.h, but glibc also has an
+# Check for the locale_t type and find the right header file. macOS
+# needs xlocale.h; standard is locale.h, but glibc also has an
# xlocale.h file that we should not use.
#
AC_DEFUN([PGAC_TYPE_LOCALE_T],
diff --git a/configure b/configure
index 5fc8c442a2..55c771a11e 100755
--- a/configure
+++ b/configure
@@ -7658,9 +7658,9 @@ $as_echo "${python_libspec} ${python_additional_libs}" >&6; }
if test "$python_enable_shared" != 1; then
if test "$PORTNAME" = darwin; then
- # OS X does supply a .dylib even though Py_ENABLE_SHARED does
+ # macOS does supply a .dylib even though Py_ENABLE_SHARED does
# not get set. The file detection logic below doesn't succeed
- # on older OS X versions, so make it explicit.
+ # on older macOS versions, so make it explicit.
python_enable_shared=1
elif test "$PORTNAME" = win32; then
# Windows also needs an explicit override.
@@ -10120,7 +10120,7 @@ else
fi
elif test "$with_uuid" = e2fs ; then
- # On OS X, the UUID functions are in libc
+ # On macOS, the UUID functions are in libc
ac_fn_c_check_func "$LINENO" "uuid_generate" "ac_cv_func_uuid_generate"
if test "x$ac_cv_func_uuid_generate" = xyes; then :
UUID_LIBS=""
@@ -12672,7 +12672,7 @@ cat >>confdefs.h <<_ACEOF
#define HAVE_DECL_STRLCPY $ac_have_decl
_ACEOF
-# This is probably only present on Darwin, but may as well check always
+# This is probably only present on macOS, but may as well check always
ac_fn_c_check_decl "$LINENO" "F_FULLFSYNC" "ac_cv_have_decl_F_FULLFSYNC" "#include
"
if test "x$ac_cv_have_decl_F_FULLFSYNC" = xyes; then :
diff --git a/configure.in b/configure.in
index 96d865de9f..9850d993ff 100644
--- a/configure.in
+++ b/configure.in
@@ -943,9 +943,9 @@ if test "$with_python" = yes; then
if test "$python_enable_shared" != 1; then
if test "$PORTNAME" = darwin; then
- # OS X does supply a .dylib even though Py_ENABLE_SHARED does
+ # macOS does supply a .dylib even though Py_ENABLE_SHARED does
# not get set. The file detection logic below doesn't succeed
- # on older OS X versions, so make it explicit.
+ # on older macOS versions, so make it explicit.
python_enable_shared=1
elif test "$PORTNAME" = win32; then
# Windows also needs an explicit override.
@@ -1182,7 +1182,7 @@ if test "$with_uuid" = bsd ; then
[UUID_LIBS=""],
[AC_MSG_ERROR([BSD UUID functions are not present])])
elif test "$with_uuid" = e2fs ; then
- # On OS X, the UUID functions are in libc
+ # On macOS, the UUID functions are in libc
AC_CHECK_FUNC(uuid_generate,
[UUID_LIBS=""],
[AC_CHECK_LIB(uuid, uuid_generate,
@@ -1425,8 +1425,8 @@ esac
if test "$PORTNAME" != "win32"; then
AC_SYS_LARGEFILE
dnl Autoconf 2.69's AC_SYS_LARGEFILE believes it's a good idea to #define
- dnl _DARWIN_USE_64_BIT_INODE, but it isn't: on OS X 10.5 that activates a
- dnl bug that causes readdir() to sometimes return EINVAL. On later OS X
+ dnl _DARWIN_USE_64_BIT_INODE, but it isn't: on macOS 10.5 that activates a
+ dnl bug that causes readdir() to sometimes return EINVAL. On later macOS
dnl versions where the feature actually works, it's on by default anyway.
AH_VERBATIM([_DARWIN_USE_64_BIT_INODE],[])
fi
@@ -1479,7 +1479,7 @@ fi
AC_CHECK_DECLS(fdatasync, [], [], [#include ])
AC_CHECK_DECLS([strlcat, strlcpy])
-# This is probably only present on Darwin, but may as well check always
+# This is probably only present on macOS, but may as well check always
AC_CHECK_DECLS(F_FULLFSYNC, [], [], [#include ])
HAVE_IPV6=no
diff --git a/doc/src/sgml/client-auth.sgml b/doc/src/sgml/client-auth.sgml
index ca262d9452..a0d97ffbac 100644
--- a/doc/src/sgml/client-auth.sgml
+++ b/doc/src/sgml/client-auth.sgml
@@ -1306,7 +1306,7 @@ omicron bryanh guest1
socket parameter, or similar mechanisms. Currently that includes
Linux>,
most flavors of BSD> including
- OS X>,
+ macOS>,
and Solaris.
diff --git a/doc/src/sgml/dfunc.sgml b/doc/src/sgml/dfunc.sgml
index 5a368f6df0..ba2684cc3c 100644
--- a/doc/src/sgml/dfunc.sgml
+++ b/doc/src/sgml/dfunc.sgml
@@ -127,8 +127,8 @@ cc -shared -o foo.so foo.o
- OS X>
- OS X>shared library>>
+ macOS>
+ macOS>shared library>>
diff --git a/doc/src/sgml/docguide.sgml b/doc/src/sgml/docguide.sgml
index 6f896b565f..48828aff37 100644
--- a/doc/src/sgml/docguide.sgml
+++ b/doc/src/sgml/docguide.sgml
@@ -275,7 +275,7 @@ apt-get install docbook docbook-dsssl docbook-xsl libxml2-utils openjade1.3 open
- OS X
+ macOS
If you use MacPorts, the following will get you set up:
diff --git a/doc/src/sgml/install-windows.sgml b/doc/src/sgml/install-windows.sgml
index 50116f315d..20fc47ae5f 100644
--- a/doc/src/sgml/install-windows.sgml
+++ b/doc/src/sgml/install-windows.sgml
@@ -51,7 +51,7 @@
MinGW-w64. These tools can also be used to
cross-compile for 32 bit and 64 bit Windows
targets on other hosts, such as Linux and
- Darwin.
+ macOS.
Cygwin is not recommended for running a
production server, and it should only be used for running on
older versions of Windows where
diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml
index f6de18ed2d..5ee28fcf85 100644
--- a/doc/src/sgml/installation.sgml
+++ b/doc/src/sgml/installation.sgml
@@ -874,7 +874,7 @@ su - postgres
Build with Bonjour support. This requires Bonjour support
- in your operating system. Recommended on OS X.
+ in your operating system. Recommended on macOS.
@@ -900,7 +900,7 @@ su - postgres
e2fs> to use the UUID library created by
the e2fsprogs> project; this library is present in most
- Linux systems and in OS X, and can be obtained for other
+ Linux systems and in macOS, and can be obtained for other
platforms as well
@@ -2010,7 +2010,7 @@ kill `cat /usr/local/pgsql/data/postmaster.pid`
PostgreSQL> can be expected to work on these operating
systems: Linux (all recent distributions), Windows (Win2000 SP4 and later),
- FreeBSD, OpenBSD, NetBSD, OS X, AIX, HP/UX, Solaris,
+ FreeBSD, OpenBSD, NetBSD, macOS, AIX, HP/UX, Solaris,
and UnixWare. Other Unix-like systems may also work but are not currently
being tested. In most cases, all CPU architectures supported by
a given operating system will work. Look in
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 077642878e..f400785721 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -2739,7 +2739,7 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid,
Currently, the
DTrace
utility is supported, which, at the time of this writing, is available
- on Solaris, OS X, FreeBSD, NetBSD, and Oracle Linux. The
+ on Solaris, macOS, FreeBSD, NetBSD, and Oracle Linux. The
SystemTap project
for Linux provides a DTrace equivalent and can also be used. Supporting other dynamic
tracing utilities is theoretically possible by changing the definitions for
diff --git a/doc/src/sgml/release-7.4.sgml b/doc/src/sgml/release-7.4.sgml
index 5a4c52d4c2..e42be5b89d 100644
--- a/doc/src/sgml/release-7.4.sgml
+++ b/doc/src/sgml/release-7.4.sgml
@@ -268,7 +268,7 @@
- This behavior has been observed on BSD-derived kernels including OS X.
+ This behavior has been observed on BSD-derived kernels including macOS.
It resulted in an entirely-misleading startup failure complaining that
the shared memory request size was too large.
@@ -2437,7 +2437,7 @@ aggregate planPretty-print UNION queries correctlyMake psql handle \r\n> newlines properly in COPY INpg_dump> handled ACLs with grant options incorrectly
-Fix thread support for OS X and Solaris
+Fix thread support for macOS and SolarisUpdated JDBC driver (build 215) with various fixesECPG fixesTranslation updates (various contributors)
@@ -2627,7 +2627,7 @@ memory> error during COPY IN>
TABLE AS> from tables without OIDs
Fix problems with alter_table> regression test
during parallel testing
-Fix problems with hitting open file limit, especially on OS X (Tom)
+Fix problems with hitting open file limit, especially on macOS (Tom)Partial fix for Turkish-locale issuesinitdb will succeed now in Turkish locale, but there are still some
inconveniences associated with the i/I> problem.
@@ -3256,7 +3256,7 @@ DROP SCHEMA information_schema CASCADE;
- Enable PAM for Mac OS X (Aaron Hillegass)
+ Enable PAM for macOS (Aaron Hillegass)Make B-tree indexes fully WAL-safe (Tom)
@@ -3539,9 +3539,9 @@ DROP SCHEMA information_schema CASCADE;
- Add Mac OS X Rendezvous server support (Chris Campbell)
+ Add macOS Rendezvous server support (Chris Campbell)
- This allows Mac OS X hosts to query the network for available
+ This allows macOS hosts to query the network for available
PostgreSQL servers.
@@ -4561,7 +4561,7 @@ DROP SCHEMA information_schema CASCADE;
Fix locking code for s390x CPU (64-bit) (Tom)Allow OpenBSD to use local ident credentials (William Ahern)Make query plan trees read-only to executor (Tom)
- Add Darwin startup scripts (David Wheeler)
+ Add macOS startup scripts (David Wheeler)Allow libpq to compile with Borland C++ compiler (Lester Godwin, Karl Waclawek)Use our own version of getopt_long() if needed (Peter)Convert administration scripts to C (Peter)
diff --git a/doc/src/sgml/release-8.0.sgml b/doc/src/sgml/release-8.0.sgml
index 299c34e0f0..becd5090cc 100644
--- a/doc/src/sgml/release-8.0.sgml
+++ b/doc/src/sgml/release-8.0.sgml
@@ -345,7 +345,7 @@
- This behavior has been observed on BSD-derived kernels including OS X.
+ This behavior has been observed on BSD-derived kernels including macOS.
It resulted in an entirely-misleading startup failure complaining that
the shared memory request size was too large.
@@ -1715,7 +1715,7 @@
While this could theoretically happen anywhere, no standard build of
- Perl did things this way ... until Mac OS X> 10.5.
+ Perl did things this way ... until macOS> 10.5.
@@ -2449,7 +2449,7 @@ Win32 to match the backend (Andrew)
(Bruce)
Fix pgxs> -L> library path
-specification for Win32, Cygwin, OS X, AIX (Bruce)
+specification for Win32, Cygwin, macOS, AIX (Bruce)
Check that SID is enabled while checking for Win32 admin
privileges (Magnus)
@@ -5224,7 +5224,7 @@ typedefs (Michael)
- Improvements to the Mac OS X startup scripts (Ray A.)
+ Improvements to the macOS startup scripts (Ray A.)
@@ -5328,7 +5328,7 @@ typedefs (Michael)
- Make libpq and ECPG build as proper shared libraries on OS X (Tom)
+ Make libpq and ECPG build as proper shared libraries on macOS (Tom)
diff --git a/doc/src/sgml/release-8.1.sgml b/doc/src/sgml/release-8.1.sgml
index 0cb5587e9b..05b07ade99 100644
--- a/doc/src/sgml/release-8.1.sgml
+++ b/doc/src/sgml/release-8.1.sgml
@@ -572,7 +572,7 @@
- This behavior has been observed on BSD-derived kernels including OS X.
+ This behavior has been observed on BSD-derived kernels including macOS.
It resulted in an entirely-misleading startup failure complaining that
the shared memory request size was too large.
@@ -2188,7 +2188,7 @@
While this could theoretically happen anywhere, no standard build of
- Perl did things this way ... until Mac OS X> 10.5.
+ Perl did things this way ... until macOS> 10.5.
@@ -2730,7 +2730,7 @@
- Fix for Darwin (OS X) compilation (Tom)
+ Fix for macOS (Darwin) compilation (Tom)
@@ -3104,7 +3104,7 @@ Win32 to match the backend (Andrew)
(Bruce)
Fix pgxs> -L> library path
-specification for Win32, Cygwin, OS X, AIX (Bruce)
+specification for Win32, Cygwin, macOS, AIX (Bruce)
Check that SID is enabled while checking for Win32 admin
privileges (Magnus)
@@ -5225,7 +5225,7 @@ SELECT CURRENT_TIMESTAMP AT TIME ZONE 'Europe/London';
Add support for fsync_writethrough on
- Darwin (Chris Campbell)
+ macOS (Chris Campbell)
diff --git a/doc/src/sgml/release-8.2.sgml b/doc/src/sgml/release-8.2.sgml
index 7f6a74bac9..2d21728cf7 100644
--- a/doc/src/sgml/release-8.2.sgml
+++ b/doc/src/sgml/release-8.2.sgml
@@ -1487,7 +1487,7 @@
- This behavior has been observed on BSD-derived kernels including OS X.
+ This behavior has been observed on BSD-derived kernels including macOS.
It resulted in an entirely-misleading startup failure complaining that
the shared memory request size was too large.
@@ -3765,7 +3765,7 @@
While this could theoretically happen anywhere, no standard build of
- Perl did things this way ... until Mac OS X> 10.5.
+ Perl did things this way ... until macOS> 10.5.
diff --git a/doc/src/sgml/release-8.3.sgml b/doc/src/sgml/release-8.3.sgml
index b56edb0102..b1b5d4875c 100644
--- a/doc/src/sgml/release-8.3.sgml
+++ b/doc/src/sgml/release-8.3.sgml
@@ -3075,7 +3075,7 @@
- This behavior has been observed on BSD-derived kernels including OS X.
+ This behavior has been observed on BSD-derived kernels including macOS.
It resulted in an entirely-misleading startup failure complaining that
the shared memory request size was too large.
@@ -8396,7 +8396,7 @@ current_date < 2017-11-17
Use SYSV> semaphores rather than POSIX on Darwin
- >= 6.0, i.e., OS X 10.2 and up (Chris Marcellino)
+ >= 6.0, i.e., macOS 10.2 and up (Chris Marcellino)
diff --git a/doc/src/sgml/release-8.4.sgml b/doc/src/sgml/release-8.4.sgml
index 8b16c9e9d3..0d0478855e 100644
--- a/doc/src/sgml/release-8.4.sgml
+++ b/doc/src/sgml/release-8.4.sgml
@@ -240,7 +240,7 @@
- Fix linking of libpython> on OS X (Tom Lane)
+ Fix linking of libpython> on macOS (Tom Lane)
@@ -5334,7 +5334,7 @@
- This behavior has been observed on BSD-derived kernels including OS X.
+ This behavior has been observed on BSD-derived kernels including macOS.
It resulted in an entirely-misleading startup failure complaining that
the shared memory request size was too large.
@@ -9764,7 +9764,7 @@ WITH w AS (SELECT * FROM foo) SELECT * FROM w, bar ... FOR UPDATE
- Enable DTrace> support on Mac OS X
+ Enable DTrace> support on macOS
Leopard> and other non-Solaris platforms (Robert Lor)
diff --git a/doc/src/sgml/release-9.0.sgml b/doc/src/sgml/release-9.0.sgml
index 61dce9fd78..2238b53745 100644
--- a/doc/src/sgml/release-9.0.sgml
+++ b/doc/src/sgml/release-9.0.sgml
@@ -1541,7 +1541,7 @@
- Warn if OS X's setlocale()> starts an unwanted extra
+ Warn if macOS's setlocale()> starts an unwanted extra
thread inside the postmaster (Noah Misch)
@@ -2093,7 +2093,7 @@
- Fix linking of libpython> on OS X (Tom Lane)
+ Fix linking of libpython> on macOS (Tom Lane)
@@ -5895,7 +5895,7 @@
- Fix incorrect quoting of log file name in Mac OS X start script
+ Fix incorrect quoting of log file name in macOS start script
(Sidar Lopez)
@@ -10745,7 +10745,7 @@ if TG_OP = 'INSERT' and NEW.col1 = ... then
- Bonjour support now requires OS X> 10.3 or later.
+ Bonjour support now requires macOS> 10.3 or later.
The older API has been deprecated by Apple.
diff --git a/doc/src/sgml/release-9.1.sgml b/doc/src/sgml/release-9.1.sgml
index a66ca0d5b3..26b709056f 100644
--- a/doc/src/sgml/release-9.1.sgml
+++ b/doc/src/sgml/release-9.1.sgml
@@ -599,7 +599,7 @@ Branch: REL9_1_STABLE [354b3a3ac] 2016-06-19 14:01:17 -0400
This dodges a portability problem on FreeBSD-derived platforms
- (including OS X).
+ (including macOS).
@@ -2937,7 +2937,7 @@ Branch: REL9_0_STABLE [9d6af7367] 2015-08-15 11:02:34 -0400
- Warn if OS X's setlocale()> starts an unwanted extra
+ Warn if macOS's setlocale()> starts an unwanted extra
thread inside the postmaster (Noah Misch)
@@ -3574,7 +3574,7 @@ Branch: REL9_0_STABLE [9d6af7367] 2015-08-15 11:02:34 -0400
- Fix linking of libpython> on OS X (Tom Lane)
+ Fix linking of libpython> on macOS (Tom Lane)
@@ -8443,7 +8443,7 @@ Branch: REL9_0_STABLE [9d6af7367] 2015-08-15 11:02:34 -0400
- Fix incorrect quoting of log file name in Mac OS X start script
+ Fix incorrect quoting of log file name in macOS start script
(Sidar Lopez)
diff --git a/doc/src/sgml/release-9.2.sgml b/doc/src/sgml/release-9.2.sgml
index c801f98c3f..0f6e3d127f 100644
--- a/doc/src/sgml/release-9.2.sgml
+++ b/doc/src/sgml/release-9.2.sgml
@@ -629,7 +629,7 @@
This dodges a portability problem on FreeBSD-derived platforms
- (including OS X).
+ (including macOS).
@@ -3190,7 +3190,7 @@ Branch: REL9_2_STABLE [6b700301c] 2015-02-17 16:03:00 +0100
- Warn if OS X's setlocale()> starts an unwanted extra
+ Warn if macOS's setlocale()> starts an unwanted extra
thread inside the postmaster (Noah Misch)
@@ -3899,7 +3899,7 @@ Branch: REL9_2_STABLE [6b700301c] 2015-02-17 16:03:00 +0100
- Fix linking of libpython> on OS X (Tom Lane)
+ Fix linking of libpython> on macOS (Tom Lane)
diff --git a/doc/src/sgml/release-9.3.sgml b/doc/src/sgml/release-9.3.sgml
index c75f1109e1..e321f4b31c 100644
--- a/doc/src/sgml/release-9.3.sgml
+++ b/doc/src/sgml/release-9.3.sgml
@@ -812,7 +812,7 @@ Branch: REL9_2_STABLE [37f30b251] 2016-04-18 13:19:52 -0400
This dodges a portability problem on FreeBSD-derived platforms
- (including OS X).
+ (including macOS).
@@ -3021,7 +3021,7 @@ Branch: REL9_0_STABLE [4dddf8552] 2015-05-21 20:41:55 -0400
- Silence some build warnings on OS X (Tom Lane)
+ Silence some build warnings on macOS (Tom Lane)
@@ -4092,7 +4092,7 @@ Branch: REL9_0_STABLE [2e4946169] 2015-01-07 22:46:20 -0500
- Warn if OS X's setlocale()> starts an unwanted extra
+ Warn if macOS's setlocale()> starts an unwanted extra
thread inside the postmaster (Noah Misch)
@@ -5743,7 +5743,7 @@ Branch: REL8_4_STABLE [ae41bb4be] 2014-05-30 18:18:32 -0400
- Fix linking of libpython> on OS X (Tom Lane)
+ Fix linking of libpython> on macOS (Tom Lane)
@@ -10710,7 +10710,7 @@ ALTER EXTENSION hstore UPDATE;
Add instructions for setting
- up the documentation tool chain on Mac OS X>
+ up the documentation tool chain on macOS>
(Peter Eisentraut)
diff --git a/doc/src/sgml/release-9.4.sgml b/doc/src/sgml/release-9.4.sgml
index 443c772846..51896924c9 100644
--- a/doc/src/sgml/release-9.4.sgml
+++ b/doc/src/sgml/release-9.4.sgml
@@ -929,7 +929,7 @@ Branch: REL9_1_STABLE [de887cc8a] 2016-05-25 19:39:49 -0400
This dodges a portability problem on FreeBSD-derived platforms
- (including OS X).
+ (including macOS).
@@ -5254,7 +5254,7 @@ Branch: REL9_3_STABLE [6347bdb31] 2015-04-05 13:01:55 -0400
- Silence some build warnings on OS X (Tom Lane)
+ Silence some build warnings on macOS (Tom Lane)
@@ -5813,7 +5813,7 @@ Branch: REL9_0_STABLE [2e4946169] 2015-01-07 22:46:20 -0500
- Warn if OS X's setlocale()> starts an unwanted extra
+ Warn if macOS's setlocale()> starts an unwanted extra
thread inside the postmaster (Noah Misch)
diff --git a/doc/src/sgml/release-9.5.sgml b/doc/src/sgml/release-9.5.sgml
index fa3537de10..c3f0f7051e 100644
--- a/doc/src/sgml/release-9.5.sgml
+++ b/doc/src/sgml/release-9.5.sgml
@@ -2023,7 +2023,7 @@ Branch: REL9_1_STABLE [e56acbe2a] 2016-02-10 19:30:12 -0500
This dodges a portability problem on FreeBSD-derived platforms
- (including OS X).
+ (including macOS).
diff --git a/doc/src/sgml/release-old.sgml b/doc/src/sgml/release-old.sgml
index ec8e43f6ea..cd9b3db35a 100644
--- a/doc/src/sgml/release-old.sgml
+++ b/doc/src/sgml/release-old.sgml
@@ -3299,7 +3299,7 @@ New BeOS port (David Reid, Cyril Velter)
Add proofreader's changes to docs (Addison-Wesley, Bruce)
New Alpha spinlock code (Adriaan Joubert, Compaq)
UnixWare port overhaul (Peter E)
-New Darwin/Mac OS X port (Peter Bierman, Bruce Hartzler)
+New macOS (Darwin) port (Peter Bierman, Bruce Hartzler)
New FreeBSD Alpha port (Alfred)
Overhaul shared memory segments (Tom)
Add IBM S/390 support (Neale Ferguson)
diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml
index ef0139c365..88ec120841 100644
--- a/doc/src/sgml/runtime.sgml
+++ b/doc/src/sgml/runtime.sgml
@@ -1006,12 +1006,12 @@ option SEMMAP=256
- OS X>
- OS X>IPC configuration>>
+ macOS>
+ macOS>IPC configuration>>
- The recommended method for configuring shared memory in OS X
+ The recommended method for configuring shared memory in macOS
is to create a file named /etc/sysctl.conf>,
containing variable assignments such as:
@@ -1021,13 +1021,13 @@ kern.sysv.shmmni=32
kern.sysv.shmseg=8
kern.sysv.shmall=1024
- Note that in some OS X versions,
+ Note that in some macOS versions,
all five> shared-memory parameters must be set in
/etc/sysctl.conf>, else the values will be ignored.
- Beware that recent releases of OS X ignore attempts to set
+ Beware that recent releases of macOS ignore attempts to set
SHMMAX> to a value that isn't an exact multiple of 4096.
@@ -1036,7 +1036,7 @@ kern.sysv.shmall=1024
- In older OS X versions, you will need to reboot to have changes in the
+ In older macOS versions, you will need to reboot to have changes in the
shared memory parameters take effect. As of 10.5 it is possible to
change all but SHMMNI> on the fly, using
sysctl>. But it's still best to set up your preferred
@@ -1045,7 +1045,7 @@ kern.sysv.shmall=1024
- The file /etc/sysctl.conf> is only honored in OS X
+ The file /etc/sysctl.conf> is only honored in macOS
10.3.9 and later. If you are running a previous 10.3.x release,
you must edit the file /etc/rc>
and change the values in the following commands:
@@ -1057,12 +1057,12 @@ sysctl -w kern.sysv.shmseg
sysctl -w kern.sysv.shmall
Note that
- /etc/rc> is usually overwritten by OS X system updates,
+ /etc/rc> is usually overwritten by macOS system updates,
so you should expect to have to redo these edits after each update.
- In OS X 10.2 and earlier, instead edit these commands in the file
+ In macOS 10.2 and earlier, instead edit these commands in the file
/System/Library/StartupItems/SystemTuning/SystemTuning>.
diff --git a/doc/src/sgml/uuid-ossp.sgml b/doc/src/sgml/uuid-ossp.sgml
index e275febe4e..227d4a839c 100644
--- a/doc/src/sgml/uuid-ossp.sgml
+++ b/doc/src/sgml/uuid-ossp.sgml
@@ -169,7 +169,7 @@ SELECT uuid_generate_v3(uuid_ns_url(), 'http://www.postgresql.org');
platforms. uuid-ossp> can now be built without the OSSP
library on some platforms. On FreeBSD, NetBSD, and some other BSD-derived
platforms, suitable UUID creation functions are included in the
- core libc> library. On Linux, OS X, and some other
+ core libc> library. On Linux, macOS, and some other
platforms, suitable functions are provided in the libuuid>
library, which originally came from the e2fsprogs> project
(though on modern Linux it is considered part
diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml
index 9ae6547721..fe3b588c72 100644
--- a/doc/src/sgml/wal.sgml
+++ b/doc/src/sgml/wal.sgml
@@ -115,7 +115,7 @@
- On OS X, write caching can be prevented by
+ On macOS, write caching can be prevented by
setting wal_sync_method> to fsync_writethrough>.
diff --git a/src/Makefile.shlib b/src/Makefile.shlib
index 924d21f443..de93f41639 100644
--- a/src/Makefile.shlib
+++ b/src/Makefile.shlib
@@ -323,7 +323,7 @@ endif
endif # shlib_major
# Where possible, restrict the symbols exported by the library to just the
-# official list, so as to avoid unintentional ABI changes. On recent Darwin
+# official list, so as to avoid unintentional ABI changes. On recent macOS
# this also quiets multiply-defined-symbol warnings in programs that use
# libpgport along with libpq.
ifneq (,$(SHLIB_EXPORTS))
diff --git a/src/backend/Makefile b/src/backend/Makefile
index 3b08defe2b..4946d37929 100644
--- a/src/backend/Makefile
+++ b/src/backend/Makefile
@@ -26,8 +26,8 @@ include $(srcdir)/common.mk
# As of 1/2010:
# The probes.o file is necessary for dtrace support on Solaris, and on recent
# versions of systemtap. (Older systemtap releases just produce an empty
-# file, but that's okay.) However, OS X's dtrace doesn't use it and doesn't
-# even recognize the -G option. So, build probes.o except on Darwin.
+# file, but that's okay.) However, macOS's dtrace doesn't use it and doesn't
+# even recognize the -G option. So, build probes.o except on macOS.
# This might need adjustment as other platforms add dtrace support.
ifneq ($(PORTNAME), darwin)
ifeq ($(enable_dtrace), yes)
diff --git a/src/backend/port/dynloader/darwin.c b/src/backend/port/dynloader/darwin.c
index a83c614f4f..7b6b48d14a 100644
--- a/src/backend/port/dynloader/darwin.c
+++ b/src/backend/port/dynloader/darwin.c
@@ -1,5 +1,5 @@
/*
- * Dynamic loading support for Darwin
+ * Dynamic loading support for macOS (Darwin)
*
* If dlopen() is available (Darwin 10.3 and later), we just use it.
* Otherwise we emulate it with the older, now deprecated, NSLinkModule API.
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 40995580af..0c0a609735 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -1273,7 +1273,7 @@ PostmasterMain(int argc, char *argv[])
#ifdef HAVE_PTHREAD_IS_THREADED_NP
/*
- * On Darwin, libintl replaces setlocale() with a version that calls
+ * On macOS, libintl replaces setlocale() with a version that calls
* CFLocaleCopyCurrent() when its second argument is "" and every relevant
* environment variable is unset or empty. CFLocaleCopyCurrent() makes
* the process multithreaded. The postmaster calls sigprocmask() and
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 582d3e460b..260a5aac49 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1844,8 +1844,8 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
* Even apart from the risk of broken locales, it's possible that there
* are platforms where the use of abbreviated keys should be disabled at
* compile time. Having only 4 byte datums could make worst-case
- * performance drastically more likely, for example. Moreover, Darwin's
- * strxfrm() implementations is known to not effectively concentrate a
+ * performance drastically more likely, for example. Moreover, macOS's
+ * strxfrm() implementation is known to not effectively concentrate a
* significant amount of entropy from the original string in earlier
* transformed blobs. It's possible that other supported platforms are
* similarly encumbered. So, if we ever get past disabling this
diff --git a/src/backend/utils/misc/ps_status.c b/src/backend/utils/misc/ps_status.c
index c50be8aab6..a889b170c8 100644
--- a/src/backend/utils/misc/ps_status.c
+++ b/src/backend/utils/misc/ps_status.c
@@ -223,8 +223,8 @@ save_ps_display_args(int argc, char **argv)
#if defined(__darwin__)
/*
- * Darwin (and perhaps other NeXT-derived platforms?) has a static
- * copy of the argv pointer, which we may fix like so:
+ * macOS (and perhaps other NeXT-derived platforms?) has a static copy
+ * of the argv pointer, which we may fix like so:
*/
*_NSGetArgv() = new_argv;
#endif
diff --git a/src/backend/utils/probes.d b/src/backend/utils/probes.d
index 976774e795..2f92dfa9ad 100644
--- a/src/backend/utils/probes.d
+++ b/src/backend/utils/probes.d
@@ -12,7 +12,7 @@
* Typedefs used in PostgreSQL.
*
* NOTE: Do not use system-provided typedefs (e.g. uintptr_t, uint32_t, etc)
- * in probe definitions, as they cause compilation errors on Mac OS X 10.5.
+ * in probe definitions, as they cause compilation errors on macOS 10.5.
*/
#define LocalTransactionId unsigned int
#define LWLockMode int
diff --git a/src/bin/psql/input.c b/src/bin/psql/input.c
index 2359b11dcd..a7d017a2d5 100644
--- a/src/bin/psql/input.c
+++ b/src/bin/psql/input.c
@@ -411,7 +411,7 @@ saveHistory(char *fname, int max_lines)
/*
* Suppressing the write attempt when HISTFILE is set to /dev/null may
- * look like a negligible optimization, but it's necessary on e.g. Darwin,
+ * look like a negligible optimization, but it's necessary on e.g. macOS,
* where write_history will fail because it tries to chmod the target
* file.
*/
diff --git a/src/include/port/darwin.h b/src/include/port/darwin.h
index 29c4b91d8c..15fb69d6db 100644
--- a/src/include/port/darwin.h
+++ b/src/include/port/darwin.h
@@ -2,7 +2,7 @@
#define __darwin__ 1
-#if HAVE_DECL_F_FULLFSYNC /* not present before OS X 10.3 */
+#if HAVE_DECL_F_FULLFSYNC /* not present before macOS 10.3 */
#define HAVE_FSYNC_WRITETHROUGH
#endif
diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index 9668b52103..f3a9e5a83f 100644
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -1252,7 +1252,7 @@ setKeepalivesIdle(PGconn *conn)
}
#else
#ifdef TCP_KEEPALIVE
- /* Darwin uses TCP_KEEPALIVE rather than TCP_KEEPIDLE */
+ /* macOS uses TCP_KEEPALIVE rather than TCP_KEEPIDLE */
if (setsockopt(conn->sock, IPPROTO_TCP, TCP_KEEPALIVE,
(char *) &idle, sizeof(idle)) < 0)
{
diff --git a/src/port/README b/src/port/README
index 58fb32d9f9..4ae96da015 100644
--- a/src/port/README
+++ b/src/port/README
@@ -28,5 +28,5 @@ applications.
from libpgport are linked first. This avoids having applications
dependent on symbols that are _used_ by libpq, but not intended to be
exported by libpq. libpq's libpgport usage changes over time, so such a
-dependency is a problem. Win32, Linux, and Darwin use an export list to
+dependency is a problem. Windows, Linux, and macOS use an export list to
control the symbols exported by libpq.
diff --git a/src/port/chklocale.c b/src/port/chklocale.c
index 3c0ef6a253..915821a4e9 100644
--- a/src/port/chklocale.c
+++ b/src/port/chklocale.c
@@ -395,7 +395,7 @@ pg_get_encoding_from_locale(const char *ctype, bool write_message)
#ifdef __darwin__
/*
- * Current OS X has many locales that report an empty string for CODESET,
+ * Current macOS has many locales that report an empty string for CODESET,
* but they all seem to actually use UTF-8.
*/
if (strlen(sys) == 0)
diff --git a/src/template/darwin b/src/template/darwin
index 542f706b0f..ea6d3b0b04 100644
--- a/src/template/darwin
+++ b/src/template/darwin
@@ -1,9 +1,12 @@
# src/template/darwin
-# Select appropriate semaphore support. Darwin 6.0 (Mac OS X 10.2) and up
-# support System V semaphores; before that we have to use POSIX semaphores,
-# which are less good for our purposes because they eat a file descriptor
-# per backend per max_connection slot.
+# Note: Darwin is the original code name for macOS, also known as OS X.
+# We still use "darwin" as the port name, partly because config.guess does.
+
+# Select appropriate semaphore support. Darwin 6.0 (macOS 10.2) and up
+# support System V semaphores; before that we have to use named POSIX
+# semaphores, which are less good for our purposes because they eat a
+# file descriptor per backend per max_connection slot.
case $host_os in
darwin[015].*)
USE_NAMED_POSIX_SEMAPHORES=1
diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c
index 1154d4c300..b28cb0b1e1 100644
--- a/src/test/regress/pg_regress.c
+++ b/src/test/regress/pg_regress.c
@@ -738,9 +738,9 @@ initialize_environment(void)
/*
* Most platforms have adopted the POSIX locale as their
* implementation-defined default locale. Exceptions include native
- * Windows, Darwin with --enable-nls, and Cygwin with --enable-nls.
+ * Windows, macOS with --enable-nls, and Cygwin with --enable-nls.
* (Use of --enable-nls matters because libintl replaces setlocale().)
- * Also, PostgreSQL does not support Darwin with locale environment
+ * Also, PostgreSQL does not support macOS with locale environment
* variables unset; see PostmasterMain().
*/
#if defined(WIN32) || defined(__CYGWIN__) || defined(__darwin__)
diff --git a/src/tools/find_typedef b/src/tools/find_typedef
index fee0fb5152..24e9b76651 100755
--- a/src/tools/find_typedef
+++ b/src/tools/find_typedef
@@ -13,7 +13,7 @@
# find both .o files and executables. Therefore, ignore error messages about
# unsuitable files being fed to objdump.
#
-# This is known to work on Linux and on some BSDen, including Mac OS X.
+# This is known to work on Linux and on some BSDen, including macOS.
#
# Caution: on the platforms we use, this only prints typedefs that are used
# to declare at least one variable or struct field. If you have say
--
cgit v1.2.3
From a4afb2b5c0b409bb175c20104b2ae9d47cf71be6 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Mon, 26 Sep 2016 11:50:35 -0400
Subject: Document has_type_privilege().
Evidently an oversight in commit 729205571. Back-patch to 9.2 where
privileges for types were introduced.
Report: <20160922173517.8214.88959@wrigleys.postgresql.org>
---
doc/src/sgml/func.sgml | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
(limited to 'doc/src')
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 3cc69bbffd..67de029c6a 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -15897,6 +15897,21 @@ SET search_path TO schema> , schema>, ..
booleandoes current user have privilege for tablespace
+
+ has_type_privilege(user,
+ type,
+ privilege)
+
+ boolean
+ does user have privilege for type
+
+
+ has_type_privilege(type,
+ privilege)
+
+ boolean
+ does current user have privilege for type
+ pg_has_role(user,
role,
@@ -15955,6 +15970,9 @@ SET search_path TO schema> , schema>, ..
has_tablespace_privilege
+
+ has_type_privilege
+ pg_has_role
@@ -16109,6 +16127,18 @@ SELECT has_function_privilege('joeuser', 'myfunc(int, text)', 'execute');
CREATE.
+
+ has_type_privilege checks whether a user
+ can access a type in a particular way.
+ Its argument possibilities
+ are analogous to has_table_privilege.
+ When specifying a type by a text string rather than by OID,
+ the allowed input is the same as for the regtype> data type
+ (see ).
+ The desired access privilege type must evaluate to
+ USAGE.
+
+
pg_has_role checks whether a user
can access a role in a particular way.
--
cgit v1.2.3
From fdc9186f7ed1ead827509584f3b763f8dc332c43 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Mon, 26 Sep 2016 14:52:44 -0400
Subject: Replace the built-in GIN array opclasses with a single polymorphic
opclass.
We had thirty different GIN array opclasses sharing the same operators and
support functions. That still didn't cover all the built-in types, nor
did it cover arrays of extension-added types. What we want is a single
polymorphic opclass for "anyarray". There were two missing features needed
to make this possible:
1. We have to be able to declare the index storage type as ANYELEMENT
when the opclass is declared to index ANYARRAY. This just takes a few
more lines in index_create(). Although this currently seems of use only
for GIN, there's no reason to make index_create() restrict it to that.
2. We have to be able to identify the proper GIN compare function for
the index storage type. This patch proceeds by making the compare function
optional in GIN opclass definitions, and specifying that the default btree
comparison function for the index storage type will be looked up when the
opclass omits it. Again, that seems pretty generically useful.
Since the comparison function lookup is done in initGinState(), making
use of the second feature adds an additional cache lookup to GIN index
access setup. It seems unlikely that that would be very noticeable given
the other costs involved, but maybe at some point we should consider
making GinState data persist longer than it now does --- we could keep it
in the index relcache entry, perhaps.
Rather fortuitously, we don't seem to need to do anything to get this
change to play nice with dump/reload or pg_upgrade scenarios: the new
opclass definition is automatically selected to replace existing index
definitions, and the on-disk data remains compatible. Also, if a user has
created a custom opclass definition for a non-builtin type, this doesn't
break that, since CREATE INDEX will prefer an exact match to opcintype
over a match to ANYARRAY. However, if there's anyone out there with
handwritten DDL that explicitly specifies _bool_ops or one of the other
replaced opclass names, they'll need to adjust that.
Tom Lane, reviewed by Enrique Meneses
Discussion: <14436.1470940379@sss.pgh.pa.us>
---
doc/src/sgml/gin.sgml | 349 ++++-------------------------------
doc/src/sgml/indices.sgml | 5 +-
doc/src/sgml/ref/create_opclass.sgml | 5 +
doc/src/sgml/xindex.sgml | 2 +-
src/backend/access/gin/ginutil.c | 32 +++-
src/backend/access/gin/ginvalidate.c | 2 +-
src/backend/catalog/index.c | 19 +-
src/include/catalog/catversion.h | 2 +-
src/include/catalog/pg_amop.h | 3 +-
src/include/catalog/pg_amproc.h | 154 +---------------
src/include/catalog/pg_opclass.h | 31 +---
11 files changed, 101 insertions(+), 503 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/gin.sgml b/doc/src/sgml/gin.sgml
index 05d92eb975..7c2321ec3c 100644
--- a/doc/src/sgml/gin.sgml
+++ b/doc/src/sgml/gin.sgml
@@ -85,298 +85,8 @@
- _abstime_ops>
- abstime[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _bit_ops>
- bit[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _bool_ops>
- boolean[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _bpchar_ops>
- character[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _bytea_ops>
- bytea[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _char_ops>
- "char"[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _cidr_ops>
- cidr[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _date_ops>
- date[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _float4_ops>
- float4[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _float8_ops>
- float8[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _inet_ops>
- inet[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _int2_ops>
- smallint[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _int4_ops>
- integer[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _int8_ops>
- bigint[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _interval_ops>
- interval[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _macaddr_ops>
- macaddr[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _money_ops>
- money[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _name_ops>
- name[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _numeric_ops>
- numeric[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _oid_ops>
- oid[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _oidvector_ops>
- oidvector[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _reltime_ops>
- reltime[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _text_ops>
- text[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _time_ops>
- time[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _timestamp_ops>
- timestamp[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _timestamptz_ops>
- timestamp with time zone[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _timetz_ops>
- time with time zone[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _tinterval_ops>
- tinterval[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _varbit_ops>
- bit varying[]>
-
- &&>
- <@>
- =>
- @>>
-
-
-
- _varchar_ops>
- character varying[]>
+ array_ops>
+ anyarray>&&>
<@>
@@ -441,22 +151,10 @@
- There are three methods that an operator class for
+ There are two methods that an operator class for
GIN must provide:
-
-
- int compare(Datum a, Datum b)>
-
-
- Compares two keys (not indexed items!) and returns an integer less than
- zero, zero, or greater than zero, indicating whether the first key is
- less than, equal to, or greater than the second. Null keys are never
- passed to this function.
-
-
-
-
+ Datum *extractValue(Datum itemValue, int32 *nkeys,
bool **nullFlags)>
@@ -645,7 +343,38 @@
+
+
+
+ In addition, GIN must have a way to sort the key values stored in the index.
+ The operator class can define the sort ordering by specifying a comparison
+ method:
+
+
+ int compare(Datum a, Datum b)>
+
+
+ Compares two keys (not indexed items!) and returns an integer less than
+ zero, zero, or greater than zero, indicating whether the first key is
+ less than, equal to, or greater than the second. Null keys are never
+ passed to this function.
+
+
+
+
+
+ Alternatively, if the operator class does not provide a compare>
+ method, GIN will look up the default btree operator class for the index
+ key data type, and use its comparison function. It is recommended to
+ specify the comparison function in a GIN operator class that is meant for
+ just one data type, as looking up the btree operator class costs a few
+ cycles. However, polymorphic GIN operator classes (such
+ as array_ops>) typically cannot specify a single comparison
+ function.
+
+
+
Optionally, an operator class for GIN can supply the
following method:
@@ -900,11 +629,9 @@
Examples
- The PostgreSQL source distribution includes
- GIN operator classes for tsvector> and
- for one-dimensional arrays of all internal types. Prefix searching in
- tsvector> is implemented using the GIN> partial match
- feature.
+ The core PostgreSQL> distribution
+ includes the GIN operator classes previously shown in
+ .
The following contrib> modules also contain
GIN operator classes:
diff --git a/doc/src/sgml/indices.sgml b/doc/src/sgml/indices.sgml
index 46f8e55ca9..271c135519 100644
--- a/doc/src/sgml/indices.sgml
+++ b/doc/src/sgml/indices.sgml
@@ -315,9 +315,8 @@ SELECT * FROM places ORDER BY location <-> point '(101,456)' LIMIT 10;
operators with which a GIN index can be used vary depending on the
indexing strategy.
As an example, the standard distribution of
- PostgreSQL includes GIN operator classes
- for one-dimensional arrays, which support indexed
- queries using these operators:
+ PostgreSQL includes a GIN operator class
+ for arrays, which supports indexed queries using these operators:
<@
diff --git a/doc/src/sgml/ref/create_opclass.sgml b/doc/src/sgml/ref/create_opclass.sgml
index 7b9d55d38d..829d8f2fff 100644
--- a/doc/src/sgml/ref/create_opclass.sgml
+++ b/doc/src/sgml/ref/create_opclass.sgml
@@ -235,6 +235,11 @@ CREATE OPERATOR CLASS name [ DEFAUL
(currently GiST, GIN and BRIN) allow it to be different. The
STORAGE> clause must be omitted unless the index
method allows a different type to be used.
+ If the column data_type> is specified
+ as anyarray>, the storage_type>
+ can be declared as anyelement> to indicate that the index
+ entries are members of the element type belonging to the actual array
+ type that each particular index is created for.
diff --git a/doc/src/sgml/xindex.sgml b/doc/src/sgml/xindex.sgml
index f0b711e2ce..333a36c456 100644
--- a/doc/src/sgml/xindex.sgml
+++ b/doc/src/sgml/xindex.sgml
@@ -288,7 +288,7 @@
have a fixed set of strategies either. Instead the support routines of
each operator class interpret the strategy numbers according to the
operator class's definition. As an example, the strategy numbers used by
- the built-in operator classes for arrays are shown in
+ the built-in operator class for arrays are shown in
.
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index d9146488c4..f07eedc0fa 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -22,7 +22,9 @@
#include "miscadmin.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h"
+#include "utils/builtins.h"
#include "utils/index_selfuncs.h"
+#include "utils/typcache.h"
/*
@@ -105,9 +107,33 @@ initGinState(GinState *state, Relation index)
origTupdesc->attrs[i]->attcollation);
}
- fmgr_info_copy(&(state->compareFn[i]),
- index_getprocinfo(index, i + 1, GIN_COMPARE_PROC),
- CurrentMemoryContext);
+ /*
+ * If the compare proc isn't specified in the opclass definition, look
+ * up the index key type's default btree comparator.
+ */
+ if (index_getprocid(index, i + 1, GIN_COMPARE_PROC) != InvalidOid)
+ {
+ fmgr_info_copy(&(state->compareFn[i]),
+ index_getprocinfo(index, i + 1, GIN_COMPARE_PROC),
+ CurrentMemoryContext);
+ }
+ else
+ {
+ TypeCacheEntry *typentry;
+
+ typentry = lookup_type_cache(origTupdesc->attrs[i]->atttypid,
+ TYPECACHE_CMP_PROC_FINFO);
+ if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify a comparison function for type %s",
+ format_type_be(origTupdesc->attrs[i]->atttypid))));
+ fmgr_info_copy(&(state->compareFn[i]),
+ &(typentry->cmp_proc_finfo),
+ CurrentMemoryContext);
+ }
+
+ /* Opclass must always provide extract procs */
fmgr_info_copy(&(state->extractValueFn[i]),
index_getprocinfo(index, i + 1, GIN_EXTRACTVALUE_PROC),
CurrentMemoryContext);
diff --git a/src/backend/access/gin/ginvalidate.c b/src/backend/access/gin/ginvalidate.c
index 032508387d..02196e0f12 100644
--- a/src/backend/access/gin/ginvalidate.c
+++ b/src/backend/access/gin/ginvalidate.c
@@ -237,7 +237,7 @@ ginvalidate(Oid opclassoid)
if (opclassgroup &&
(opclassgroup->functionset & (((uint64) 1) << i)) != 0)
continue; /* got it */
- if (i == GIN_COMPARE_PARTIAL_PROC)
+ if (i == GIN_COMPARE_PROC || i == GIN_COMPARE_PARTIAL_PROC)
continue; /* optional method */
if (i == GIN_CONSISTENT_PROC || i == GIN_TRICONSISTENT_PROC)
continue; /* don't need both, see check below loop */
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index b0b43cf02d..08b646d8f3 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -437,11 +437,28 @@ ConstructTupleDescriptor(Relation heapRelation,
keyType = opclassTup->opckeytype;
else
keyType = amroutine->amkeytype;
+
+ /*
+ * If keytype is specified as ANYELEMENT, and opcintype is ANYARRAY,
+ * then the attribute type must be an array (else it'd not have
+ * matched this opclass); use its element type.
+ */
+ if (keyType == ANYELEMENTOID && opclassTup->opcintype == ANYARRAYOID)
+ {
+ keyType = get_base_element_type(to->atttypid);
+ if (!OidIsValid(keyType))
+ elog(ERROR, "could not get element type of array type %u",
+ to->atttypid);
+ }
+
ReleaseSysCache(tuple);
+ /*
+ * If a key type different from the heap value is specified, update
+ * the type-related fields in the index tupdesc.
+ */
if (OidIsValid(keyType) && keyType != to->atttypid)
{
- /* index value and heap value have different types */
tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
if (!HeapTupleIsValid(tuple))
elog(ERROR, "cache lookup failed for type %u", keyType);
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index ef691c5721..3fdd0d6129 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201609131
+#define CATALOG_VERSION_NO 201609261
#endif
diff --git a/src/include/catalog/pg_amop.h b/src/include/catalog/pg_amop.h
index 917ed46b71..15b629029f 100644
--- a/src/include/catalog/pg_amop.h
+++ b/src/include/catalog/pg_amop.h
@@ -673,8 +673,7 @@ DATA(insert ( 2595 718 718 14 s 2864 783 0 ));
DATA(insert ( 2595 718 600 15 o 3291 783 1970 ));
/*
- * gin array_ops (these anyarray operators are used with all the opclasses
- * of the family)
+ * gin array_ops
*/
DATA(insert ( 2745 2277 2277 1 s 2750 2742 0 ));
DATA(insert ( 2745 2277 2277 2 s 2751 2742 0 ));
diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h
index 0cbb416392..1b654d5be4 100644
--- a/src/include/catalog/pg_amproc.h
+++ b/src/include/catalog/pg_amproc.h
@@ -255,156 +255,10 @@ DATA(insert ( 3550 869 869 9 3573 ));
/* gin */
-DATA(insert ( 2745 1007 1007 1 351 ));
-DATA(insert ( 2745 1007 1007 2 2743 ));
-DATA(insert ( 2745 1007 1007 3 2774 ));
-DATA(insert ( 2745 1007 1007 4 2744 ));
-DATA(insert ( 2745 1007 1007 6 3920 ));
-DATA(insert ( 2745 1009 1009 1 360 ));
-DATA(insert ( 2745 1009 1009 2 2743 ));
-DATA(insert ( 2745 1009 1009 3 2774 ));
-DATA(insert ( 2745 1009 1009 4 2744 ));
-DATA(insert ( 2745 1009 1009 6 3920 ));
-DATA(insert ( 2745 1015 1015 1 360 ));
-DATA(insert ( 2745 1015 1015 2 2743 ));
-DATA(insert ( 2745 1015 1015 3 2774 ));
-DATA(insert ( 2745 1015 1015 4 2744 ));
-DATA(insert ( 2745 1015 1015 6 3920 ));
-DATA(insert ( 2745 1023 1023 1 357 ));
-DATA(insert ( 2745 1023 1023 2 2743 ));
-DATA(insert ( 2745 1023 1023 3 2774 ));
-DATA(insert ( 2745 1023 1023 4 2744 ));
-DATA(insert ( 2745 1023 1023 6 3920 ));
-DATA(insert ( 2745 1561 1561 1 1596 ));
-DATA(insert ( 2745 1561 1561 2 2743 ));
-DATA(insert ( 2745 1561 1561 3 2774 ));
-DATA(insert ( 2745 1561 1561 4 2744 ));
-DATA(insert ( 2745 1561 1561 6 3920 ));
-DATA(insert ( 2745 1000 1000 1 1693 ));
-DATA(insert ( 2745 1000 1000 2 2743 ));
-DATA(insert ( 2745 1000 1000 3 2774 ));
-DATA(insert ( 2745 1000 1000 4 2744 ));
-DATA(insert ( 2745 1000 1000 6 3920 ));
-DATA(insert ( 2745 1014 1014 1 1078 ));
-DATA(insert ( 2745 1014 1014 2 2743 ));
-DATA(insert ( 2745 1014 1014 3 2774 ));
-DATA(insert ( 2745 1014 1014 4 2744 ));
-DATA(insert ( 2745 1014 1014 6 3920 ));
-DATA(insert ( 2745 1001 1001 1 1954 ));
-DATA(insert ( 2745 1001 1001 2 2743 ));
-DATA(insert ( 2745 1001 1001 3 2774 ));
-DATA(insert ( 2745 1001 1001 4 2744 ));
-DATA(insert ( 2745 1001 1001 6 3920 ));
-DATA(insert ( 2745 1002 1002 1 358 ));
-DATA(insert ( 2745 1002 1002 2 2743 ));
-DATA(insert ( 2745 1002 1002 3 2774 ));
-DATA(insert ( 2745 1002 1002 4 2744 ));
-DATA(insert ( 2745 1002 1002 6 3920 ));
-DATA(insert ( 2745 1182 1182 1 1092 ));
-DATA(insert ( 2745 1182 1182 2 2743 ));
-DATA(insert ( 2745 1182 1182 3 2774 ));
-DATA(insert ( 2745 1182 1182 4 2744 ));
-DATA(insert ( 2745 1182 1182 6 3920 ));
-DATA(insert ( 2745 1021 1021 1 354 ));
-DATA(insert ( 2745 1021 1021 2 2743 ));
-DATA(insert ( 2745 1021 1021 3 2774 ));
-DATA(insert ( 2745 1021 1021 4 2744 ));
-DATA(insert ( 2745 1021 1021 6 3920 ));
-DATA(insert ( 2745 1022 1022 1 355 ));
-DATA(insert ( 2745 1022 1022 2 2743 ));
-DATA(insert ( 2745 1022 1022 3 2774 ));
-DATA(insert ( 2745 1022 1022 4 2744 ));
-DATA(insert ( 2745 1022 1022 6 3920 ));
-DATA(insert ( 2745 1041 1041 1 926 ));
-DATA(insert ( 2745 1041 1041 2 2743 ));
-DATA(insert ( 2745 1041 1041 3 2774 ));
-DATA(insert ( 2745 1041 1041 4 2744 ));
-DATA(insert ( 2745 1041 1041 6 3920 ));
-DATA(insert ( 2745 651 651 1 926 ));
-DATA(insert ( 2745 651 651 2 2743 ));
-DATA(insert ( 2745 651 651 3 2774 ));
-DATA(insert ( 2745 651 651 4 2744 ));
-DATA(insert ( 2745 651 651 6 3920 ));
-DATA(insert ( 2745 1005 1005 1 350 ));
-DATA(insert ( 2745 1005 1005 2 2743 ));
-DATA(insert ( 2745 1005 1005 3 2774 ));
-DATA(insert ( 2745 1005 1005 4 2744 ));
-DATA(insert ( 2745 1005 1005 6 3920 ));
-DATA(insert ( 2745 1016 1016 1 842 ));
-DATA(insert ( 2745 1016 1016 2 2743 ));
-DATA(insert ( 2745 1016 1016 3 2774 ));
-DATA(insert ( 2745 1016 1016 4 2744 ));
-DATA(insert ( 2745 1016 1016 6 3920 ));
-DATA(insert ( 2745 1187 1187 1 1315 ));
-DATA(insert ( 2745 1187 1187 2 2743 ));
-DATA(insert ( 2745 1187 1187 3 2774 ));
-DATA(insert ( 2745 1187 1187 4 2744 ));
-DATA(insert ( 2745 1187 1187 6 3920 ));
-DATA(insert ( 2745 1040 1040 1 836 ));
-DATA(insert ( 2745 1040 1040 2 2743 ));
-DATA(insert ( 2745 1040 1040 3 2774 ));
-DATA(insert ( 2745 1040 1040 4 2744 ));
-DATA(insert ( 2745 1040 1040 6 3920 ));
-DATA(insert ( 2745 1003 1003 1 359 ));
-DATA(insert ( 2745 1003 1003 2 2743 ));
-DATA(insert ( 2745 1003 1003 3 2774 ));
-DATA(insert ( 2745 1003 1003 4 2744 ));
-DATA(insert ( 2745 1003 1003 6 3920 ));
-DATA(insert ( 2745 1231 1231 1 1769 ));
-DATA(insert ( 2745 1231 1231 2 2743 ));
-DATA(insert ( 2745 1231 1231 3 2774 ));
-DATA(insert ( 2745 1231 1231 4 2744 ));
-DATA(insert ( 2745 1231 1231 6 3920 ));
-DATA(insert ( 2745 1028 1028 1 356 ));
-DATA(insert ( 2745 1028 1028 2 2743 ));
-DATA(insert ( 2745 1028 1028 3 2774 ));
-DATA(insert ( 2745 1028 1028 4 2744 ));
-DATA(insert ( 2745 1028 1028 6 3920 ));
-DATA(insert ( 2745 1013 1013 1 404 ));
-DATA(insert ( 2745 1013 1013 2 2743 ));
-DATA(insert ( 2745 1013 1013 3 2774 ));
-DATA(insert ( 2745 1013 1013 4 2744 ));
-DATA(insert ( 2745 1013 1013 6 3920 ));
-DATA(insert ( 2745 1183 1183 1 1107 ));
-DATA(insert ( 2745 1183 1183 2 2743 ));
-DATA(insert ( 2745 1183 1183 3 2774 ));
-DATA(insert ( 2745 1183 1183 4 2744 ));
-DATA(insert ( 2745 1183 1183 6 3920 ));
-DATA(insert ( 2745 1185 1185 1 1314 ));
-DATA(insert ( 2745 1185 1185 2 2743 ));
-DATA(insert ( 2745 1185 1185 3 2774 ));
-DATA(insert ( 2745 1185 1185 4 2744 ));
-DATA(insert ( 2745 1185 1185 6 3920 ));
-DATA(insert ( 2745 1270 1270 1 1358 ));
-DATA(insert ( 2745 1270 1270 2 2743 ));
-DATA(insert ( 2745 1270 1270 3 2774 ));
-DATA(insert ( 2745 1270 1270 4 2744 ));
-DATA(insert ( 2745 1270 1270 6 3920 ));
-DATA(insert ( 2745 1563 1563 1 1672 ));
-DATA(insert ( 2745 1563 1563 2 2743 ));
-DATA(insert ( 2745 1563 1563 3 2774 ));
-DATA(insert ( 2745 1563 1563 4 2744 ));
-DATA(insert ( 2745 1563 1563 6 3920 ));
-DATA(insert ( 2745 1115 1115 1 2045 ));
-DATA(insert ( 2745 1115 1115 2 2743 ));
-DATA(insert ( 2745 1115 1115 3 2774 ));
-DATA(insert ( 2745 1115 1115 4 2744 ));
-DATA(insert ( 2745 1115 1115 6 3920 ));
-DATA(insert ( 2745 791 791 1 377 ));
-DATA(insert ( 2745 791 791 2 2743 ));
-DATA(insert ( 2745 791 791 3 2774 ));
-DATA(insert ( 2745 791 791 4 2744 ));
-DATA(insert ( 2745 791 791 6 3920 ));
-DATA(insert ( 2745 1024 1024 1 380 ));
-DATA(insert ( 2745 1024 1024 2 2743 ));
-DATA(insert ( 2745 1024 1024 3 2774 ));
-DATA(insert ( 2745 1024 1024 4 2744 ));
-DATA(insert ( 2745 1024 1024 6 3920 ));
-DATA(insert ( 2745 1025 1025 1 381 ));
-DATA(insert ( 2745 1025 1025 2 2743 ));
-DATA(insert ( 2745 1025 1025 3 2774 ));
-DATA(insert ( 2745 1025 1025 4 2744 ));
-DATA(insert ( 2745 1025 1025 6 3920 ));
+DATA(insert ( 2745 2277 2277 2 2743 ));
+DATA(insert ( 2745 2277 2277 3 2774 ));
+DATA(insert ( 2745 2277 2277 4 2744 ));
+DATA(insert ( 2745 2277 2277 6 3920 ));
DATA(insert ( 3659 3614 3614 1 3724 ));
DATA(insert ( 3659 3614 3614 2 3656 ));
DATA(insert ( 3659 3614 3614 3 3657 ));
diff --git a/src/include/catalog/pg_opclass.h b/src/include/catalog/pg_opclass.h
index f40b06112b..5900cdc5b0 100644
--- a/src/include/catalog/pg_opclass.h
+++ b/src/include/catalog/pg_opclass.h
@@ -184,36 +184,7 @@ DATA(insert ( 783 box_ops PGNSP PGUID 2593 603 t 0 ));
DATA(insert ( 783 point_ops PGNSP PGUID 1029 600 t 603 ));
DATA(insert ( 783 poly_ops PGNSP PGUID 2594 604 t 603 ));
DATA(insert ( 783 circle_ops PGNSP PGUID 2595 718 t 603 ));
-DATA(insert ( 2742 _int4_ops PGNSP PGUID 2745 1007 t 23 ));
-DATA(insert ( 2742 _text_ops PGNSP PGUID 2745 1009 t 25 ));
-DATA(insert ( 2742 _abstime_ops PGNSP PGUID 2745 1023 t 702 ));
-DATA(insert ( 2742 _bit_ops PGNSP PGUID 2745 1561 t 1560 ));
-DATA(insert ( 2742 _bool_ops PGNSP PGUID 2745 1000 t 16 ));
-DATA(insert ( 2742 _bpchar_ops PGNSP PGUID 2745 1014 t 1042 ));
-DATA(insert ( 2742 _bytea_ops PGNSP PGUID 2745 1001 t 17 ));
-DATA(insert ( 2742 _char_ops PGNSP PGUID 2745 1002 t 18 ));
-DATA(insert ( 2742 _cidr_ops PGNSP PGUID 2745 651 t 650 ));
-DATA(insert ( 2742 _date_ops PGNSP PGUID 2745 1182 t 1082 ));
-DATA(insert ( 2742 _float4_ops PGNSP PGUID 2745 1021 t 700 ));
-DATA(insert ( 2742 _float8_ops PGNSP PGUID 2745 1022 t 701 ));
-DATA(insert ( 2742 _inet_ops PGNSP PGUID 2745 1041 t 869 ));
-DATA(insert ( 2742 _int2_ops PGNSP PGUID 2745 1005 t 21 ));
-DATA(insert ( 2742 _int8_ops PGNSP PGUID 2745 1016 t 20 ));
-DATA(insert ( 2742 _interval_ops PGNSP PGUID 2745 1187 t 1186 ));
-DATA(insert ( 2742 _macaddr_ops PGNSP PGUID 2745 1040 t 829 ));
-DATA(insert ( 2742 _name_ops PGNSP PGUID 2745 1003 t 19 ));
-DATA(insert ( 2742 _numeric_ops PGNSP PGUID 2745 1231 t 1700 ));
-DATA(insert ( 2742 _oid_ops PGNSP PGUID 2745 1028 t 26 ));
-DATA(insert ( 2742 _oidvector_ops PGNSP PGUID 2745 1013 t 30 ));
-DATA(insert ( 2742 _time_ops PGNSP PGUID 2745 1183 t 1083 ));
-DATA(insert ( 2742 _timestamptz_ops PGNSP PGUID 2745 1185 t 1184 ));
-DATA(insert ( 2742 _timetz_ops PGNSP PGUID 2745 1270 t 1266 ));
-DATA(insert ( 2742 _varbit_ops PGNSP PGUID 2745 1563 t 1562 ));
-DATA(insert ( 2742 _varchar_ops PGNSP PGUID 2745 1015 t 1043 ));
-DATA(insert ( 2742 _timestamp_ops PGNSP PGUID 2745 1115 t 1114 ));
-DATA(insert ( 2742 _money_ops PGNSP PGUID 2745 791 t 790 ));
-DATA(insert ( 2742 _reltime_ops PGNSP PGUID 2745 1024 t 703 ));
-DATA(insert ( 2742 _tinterval_ops PGNSP PGUID 2745 1025 t 704 ));
+DATA(insert ( 2742 array_ops PGNSP PGUID 2745 2277 t 2283 ));
DATA(insert ( 403 uuid_ops PGNSP PGUID 2968 2950 t 0 ));
DATA(insert ( 405 uuid_ops PGNSP PGUID 2969 2950 t 0 ));
DATA(insert ( 403 pg_lsn_ops PGNSP PGUID 3253 3220 t 0 ));
--
cgit v1.2.3
From babe05bc2b781eb3eb84a18d7010d08277e2e399 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas
Date: Wed, 28 Sep 2016 12:22:44 +0300
Subject: Turn password_encryption GUC into an enum.
This makes the parameter easier to extend, to support other password-based
authentication protocols than MD5. (SCRAM is being worked on.)
The GUC still accepts on/off as aliases for "md5" and "plain", although
we may want to remove those once we actually add support for another
password hash type.
Michael Paquier, reviewed by David Steele, with some further edits by me.
Discussion:
---
doc/src/sgml/config.sgml | 17 ++++++-----
src/backend/commands/user.c | 18 +++++------
src/backend/utils/misc/guc.c | 44 +++++++++++++++++++--------
src/backend/utils/misc/postgresql.conf.sample | 2 +-
src/include/commands/user.h | 15 +++++++--
5 files changed, 62 insertions(+), 34 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index a848a7edd1..e826c19698 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1163,21 +1163,22 @@ include_dir 'conf.d'
- password_encryption (boolean)
+ password_encryption (enum)
password_encryption> configuration parameter
- When a password is specified in or
-
- without writing either ENCRYPTED> or
- UNENCRYPTED>, this parameter determines whether the
- password is to be encrypted. The default is on>
- (encrypt the password).
+ When a password is specified in or
+ without writing either ENCRYPTED>
+ or UNENCRYPTED>, this parameter determines whether the
+ password is to be encrypted. The default value is md5>, which
+ stores the password as an MD5 hash. Setting this to plain> stores
+ it in plaintext. on> and off> are also accepted, as
+ aliases for md5> and plain>, respectively.
+
diff --git a/src/backend/commands/user.c b/src/backend/commands/user.c
index 4027c89b14..adc6b99b21 100644
--- a/src/backend/commands/user.c
+++ b/src/backend/commands/user.c
@@ -44,7 +44,7 @@ Oid binary_upgrade_next_pg_authid_oid = InvalidOid;
/* GUC parameter */
-extern bool Password_encryption;
+int Password_encryption = PASSWORD_TYPE_MD5;
/* Hook to check passwords in CreateRole() and AlterRole() */
check_password_hook_type check_password_hook = NULL;
@@ -80,7 +80,7 @@ CreateRole(ParseState *pstate, CreateRoleStmt *stmt)
ListCell *item;
ListCell *option;
char *password = NULL; /* user password */
- bool encrypt_password = Password_encryption; /* encrypt password? */
+ int password_type = Password_encryption;
char encrypted_password[MD5_PASSWD_LEN + 1];
bool issuper = false; /* Make the user a superuser? */
bool inherit = true; /* Auto inherit privileges? */
@@ -140,9 +140,9 @@ CreateRole(ParseState *pstate, CreateRoleStmt *stmt)
parser_errposition(pstate, defel->location)));
dpassword = defel;
if (strcmp(defel->defname, "encryptedPassword") == 0)
- encrypt_password = true;
+ password_type = PASSWORD_TYPE_MD5;
else if (strcmp(defel->defname, "unencryptedPassword") == 0)
- encrypt_password = false;
+ password_type = PASSWORD_TYPE_PLAINTEXT;
}
else if (strcmp(defel->defname, "sysid") == 0)
{
@@ -393,7 +393,7 @@ CreateRole(ParseState *pstate, CreateRoleStmt *stmt)
if (password)
{
- if (!encrypt_password || isMD5(password))
+ if (password_type == PASSWORD_TYPE_PLAINTEXT || isMD5(password))
new_record[Anum_pg_authid_rolpassword - 1] =
CStringGetTextDatum(password);
else
@@ -505,7 +505,7 @@ AlterRole(AlterRoleStmt *stmt)
ListCell *option;
char *rolename = NULL;
char *password = NULL; /* user password */
- bool encrypt_password = Password_encryption; /* encrypt password? */
+ int password_type = Password_encryption;
char encrypted_password[MD5_PASSWD_LEN + 1];
int issuper = -1; /* Make the user a superuser? */
int inherit = -1; /* Auto inherit privileges? */
@@ -550,9 +550,9 @@ AlterRole(AlterRoleStmt *stmt)
errmsg("conflicting or redundant options")));
dpassword = defel;
if (strcmp(defel->defname, "encryptedPassword") == 0)
- encrypt_password = true;
+ password_type = PASSWORD_TYPE_MD5;
else if (strcmp(defel->defname, "unencryptedPassword") == 0)
- encrypt_password = false;
+ password_type = PASSWORD_TYPE_PLAINTEXT;
}
else if (strcmp(defel->defname, "superuser") == 0)
{
@@ -804,7 +804,7 @@ AlterRole(AlterRoleStmt *stmt)
/* password */
if (password)
{
- if (!encrypt_password || isMD5(password))
+ if (password_type == PASSWORD_TYPE_PLAINTEXT || isMD5(password))
new_record[Anum_pg_authid_rolpassword - 1] =
CStringGetTextDatum(password);
else
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index ce4eef950b..cced814d6a 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -34,6 +34,7 @@
#include "catalog/namespace.h"
#include "commands/async.h"
#include "commands/prepare.h"
+#include "commands/user.h"
#include "commands/vacuum.h"
#include "commands/variable.h"
#include "commands/trigger.h"
@@ -393,6 +394,24 @@ static const struct config_enum_entry force_parallel_mode_options[] = {
{NULL, 0, false}
};
+/*
+ * password_encryption used to be a boolean, so accept all the likely
+ * variants of "on" and "off", too.
+ */
+static const struct config_enum_entry password_encryption_options[] = {
+ {"plain", PASSWORD_TYPE_PLAINTEXT, false},
+ {"md5", PASSWORD_TYPE_MD5, false},
+ {"off", PASSWORD_TYPE_PLAINTEXT, false},
+ {"on", PASSWORD_TYPE_MD5, false},
+ {"true", PASSWORD_TYPE_MD5, true},
+ {"false", PASSWORD_TYPE_PLAINTEXT, true},
+ {"yes", PASSWORD_TYPE_MD5, true},
+ {"no", PASSWORD_TYPE_PLAINTEXT, true},
+ {"1", PASSWORD_TYPE_MD5, true},
+ {"0", PASSWORD_TYPE_PLAINTEXT, true},
+ {NULL, 0, false}
+};
+
/*
* Options for enum values stored in other modules
*/
@@ -423,8 +442,6 @@ bool check_function_bodies = true;
bool default_with_oids = false;
bool SQL_inheritance = true;
-bool Password_encryption = true;
-
int log_min_error_statement = ERROR;
int log_min_messages = WARNING;
int client_min_messages = NOTICE;
@@ -1313,17 +1330,6 @@ static struct config_bool ConfigureNamesBool[] =
true,
NULL, NULL, NULL
},
- {
- {"password_encryption", PGC_USERSET, CONN_AUTH_SECURITY,
- gettext_noop("Encrypt passwords."),
- gettext_noop("When a password is specified in CREATE USER or "
- "ALTER USER without writing either ENCRYPTED or UNENCRYPTED, "
- "this parameter determines whether the password is to be encrypted.")
- },
- &Password_encryption,
- true,
- NULL, NULL, NULL
- },
{
{"transform_null_equals", PGC_USERSET, COMPAT_OPTIONS_CLIENT,
gettext_noop("Treats \"expr=NULL\" as \"expr IS NULL\"."),
@@ -3810,6 +3816,18 @@ static struct config_enum ConfigureNamesEnum[] =
NULL, NULL, NULL
},
+ {
+ {"password_encryption", PGC_USERSET, CONN_AUTH_SECURITY,
+ gettext_noop("Encrypt passwords."),
+ gettext_noop("When a password is specified in CREATE USER or "
+ "ALTER USER without writing either ENCRYPTED or UNENCRYPTED, "
+ "this parameter determines whether the password is to be encrypted.")
+ },
+ &Password_encryption,
+ PASSWORD_TYPE_MD5, password_encryption_options,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, NULL, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index b1c3aea9ee..05b1373594 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -85,7 +85,7 @@
#ssl_key_file = 'server.key' # (change requires restart)
#ssl_ca_file = '' # (change requires restart)
#ssl_crl_file = '' # (change requires restart)
-#password_encryption = on
+#password_encryption = md5 # md5 or plain
#db_user_namespace = off
#row_security = on
diff --git a/src/include/commands/user.h b/src/include/commands/user.h
index 1f0cfcc86f..102c2a5861 100644
--- a/src/include/commands/user.h
+++ b/src/include/commands/user.h
@@ -16,10 +16,19 @@
#include "parser/parse_node.h"
-/* Hook to check passwords in CreateRole() and AlterRole() */
-#define PASSWORD_TYPE_PLAINTEXT 0
-#define PASSWORD_TYPE_MD5 1
+/*
+ * Types of password, for Password_encryption GUC and the password_type
+ * argument of the check-password hook.
+ */
+typedef enum PasswordType
+{
+ PASSWORD_TYPE_PLAINTEXT = 0,
+ PASSWORD_TYPE_MD5
+} PasswordType;
+extern int Password_encryption; /* GUC */
+
+/* Hook to check passwords in CreateRole() and AlterRole() */
typedef void (*check_password_hook_type) (const char *username, const char *password, int password_type, Datum validuntil_time, bool validuntil_null);
extern PGDLLIMPORT check_password_hook_type check_password_hook;
--
cgit v1.2.3
From d3cd36a133d96ad5578b6c10279b55fd5b538093 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Wed, 28 Sep 2016 14:36:04 -0400
Subject: Make to_timestamp() and to_date() range-check fields of their input.
Historically, something like to_date('2009-06-40','YYYY-MM-DD') would
return '2009-07-10' because there was no prohibition on out-of-range
month or day numbers. This has been widely panned, and it also turns
out that Oracle throws an error in such cases. Since these functions
are nominally Oracle-compatibility features, let's change that.
There's no particular restriction on year (modulo the fact that the
scanner may not believe that more than 4 digits are year digits,
a matter to be addressed separately if at all). But we now check month,
day, hour, minute, second, and fractional-second fields, as well as
day-of-year and second-of-day fields if those are used.
Currently, no checks are made on ISO-8601-style week numbers or day
numbers; it's not very clear what the appropriate rules would be there,
and they're probably so little used that it's not worth sweating over.
Artur Zakirov, reviewed by Amul Sul, further adjustments by me
Discussion: <1873520224.1784572.1465833145330.JavaMail.yahoo@mail.yahoo.com>
See-Also: <57786490.9010201@wars-nicht.de>
---
doc/src/sgml/func.sgml | 85 +++++++++++++++++-------------
src/backend/utils/adt/formatting.c | 94 +++++++++++++++++++++++----------
src/test/regress/expected/horology.out | 96 +++++++++++++++++++++++++++++++++-
src/test/regress/sql/horology.sql | 30 ++++++++++-
4 files changed, 239 insertions(+), 66 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 67de029c6a..a58835082b 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -5832,6 +5832,17 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
+
+
+ to_timestamp and to_date
+ exist to handle input formats that cannot be converted by
+ simple casting. For most standard date/time formats, simply casting the
+ source string to the required data type works, and is much easier.
+ Similarly, to_number> is unnecessary for standard numeric
+ representations.
+
+
+
In a to_char> output template string, there are certain
patterns that are recognized and replaced with appropriately-formatted
@@ -6038,7 +6049,7 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
Q
- quarter (ignored by to_date> and to_timestamp>)
+ quarterRM
@@ -6156,20 +6167,6 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
-
-
- to_timestamp and to_date
- exist to handle input formats that cannot be converted by
- simple casting. These functions interpret input liberally,
- with minimal error checking. While they produce valid output,
- the conversion can yield unexpected results. For example,
- input to these functions is not restricted by normal ranges,
- thus to_date('20096040','YYYYMMDD') returns
- 2014-01-17 rather than causing an error.
- Casting does not have this behavior.
-
-
-
Ordinary text is allowed in to_char
@@ -6195,7 +6192,8 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
- If the year format specification is less than four digits, e.g.
+ In to_timestamp and to_date,
+ if the year format specification is less than four digits, e.g.
YYY>, and the supplied year is less than four digits,
the year will be adjusted to be nearest to the year 2020, e.g.
95> becomes 1995.
@@ -6204,8 +6202,9 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
- The YYYY conversion from string to timestamp or
- date has a restriction when processing years with more than 4 digits. You must
+ In to_timestamp and to_date,
+ the YYYY conversion has a restriction when
+ processing years with more than 4 digits. You must
use some non-digit character or template after YYYY,
otherwise the year is always interpreted as 4 digits. For example
(with the year 20000):
@@ -6219,12 +6218,12 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
- In conversions from string to timestamp or
- date, the CC (century) field is ignored
+ In to_timestamp and to_date,
+ the CC (century) field is accepted but ignored
if there is a YYY, YYYY or
Y,YYY field. If CC is used with
- YY or Y then the year is computed
- as the year in the specified century. If the century is
+ YY or Y then the result is
+ computed as that year in the specified century. If the century is
specified but the year is not, the first year of the century
is assumed.
@@ -6232,9 +6231,19 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
- An ISO 8601 week-numbering date (as distinct from a Gregorian date)
- can be specified to to_timestamp and
- to_date in one of two ways:
+ In to_timestamp and to_date,
+ weekday names or numbers (DAY, D,
+ and related field types) are accepted but are ignored for purposes of
+ computing the result. The same is true for quarter
+ (Q) fields.
+
+
+
+
+
+ In to_timestamp and to_date,
+ an ISO 8601 week-numbering date (as distinct from a Gregorian date)
+ can be specified in one of two ways:
@@ -6276,23 +6285,24 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
- In a conversion from string to timestamp, millisecond
+ In to_timestamp, millisecond
(MS) or microsecond (US)
- values are used as the
+ fields are used as the
seconds digits after the decimal point. For example
- to_timestamp('12:3', 'SS:MS') is not 3 milliseconds,
- but 300, because the conversion counts it as 12 + 0.3 seconds.
- This means for the format SS:MS, the input values
- 12:3, 12:30, and 12:300 specify the
- same number of milliseconds. To get three milliseconds, one must use
- 12:003, which the conversion counts as
+ to_timestamp('12.3', 'SS.MS') is not 3 milliseconds,
+ but 300, because the conversion treats it as 12 + 0.3 seconds.
+ So, for the format SS.MS, the input values
+ 12.3, 12.30,
+ and 12.300 specify the
+ same number of milliseconds. To get three milliseconds, one must write
+ 12.003, which the conversion treats as
12 + 0.003 = 12.003 seconds.
Here is a more
complex example:
- to_timestamp('15:12:02.020.001230', 'HH:MI:SS.MS.US')
+ to_timestamp('15:12:02.020.001230', 'HH24:MI:SS.MS.US')
is 15 hours, 12 minutes, and 2 seconds + 20 milliseconds +
1230 microseconds = 2.021230 seconds.
@@ -6310,9 +6320,10 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
to_char(interval) formats HH> and
- HH12> as shown on a 12-hour clock, i.e. zero hours
- and 36 hours output as 12>, while HH24>
- outputs the full hour value, which can exceed 23 for intervals.
+ HH12> as shown on a 12-hour clock, for example zero hours
+ and 36 hours both output as 12>, while HH24>
+ outputs the full hour value, which can exceed 23 in
+ an interval> value.
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index bbd97dc84b..d2d23d31ff 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -3553,9 +3553,6 @@ to_date(PG_FUNCTION_ARGS)
*
* The TmFromChar is then analysed and converted into the final results in
* struct 'tm' and 'fsec'.
- *
- * This function does very little error checking, e.g.
- * to_timestamp('20096040','YYYYMMDD') works
*/
static void
do_to_timestamp(text *date_txt, text *fmt,
@@ -3564,30 +3561,35 @@ do_to_timestamp(text *date_txt, text *fmt,
FormatNode *format;
TmFromChar tmfc;
int fmt_len;
+ char *date_str;
+ int fmask;
+
+ date_str = text_to_cstring(date_txt);
ZERO_tmfc(&tmfc);
ZERO_tm(tm);
*fsec = 0;
+ fmask = 0; /* bit mask for ValidateDate() */
fmt_len = VARSIZE_ANY_EXHDR(fmt);
if (fmt_len)
{
char *fmt_str;
- char *date_str;
bool incache;
fmt_str = text_to_cstring(fmt);
- /*
- * Allocate new memory if format picture is bigger than static cache
- * and not use cache (call parser always)
- */
if (fmt_len > DCH_CACHE_SIZE)
{
- format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
+ /*
+ * Allocate new memory if format picture is bigger than static
+ * cache and not use cache (call parser always)
+ */
incache = FALSE;
+ format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
+
parse_format(format, fmt_str, DCH_keywords,
DCH_suff, DCH_index, DCH_TYPE, NULL);
@@ -3604,33 +3606,27 @@ do_to_timestamp(text *date_txt, text *fmt,
if ((ent = DCH_cache_search(fmt_str)) == NULL)
{
- ent = DCH_cache_getnew(fmt_str);
-
/*
* Not in the cache, must run parser and save a new
* format-picture to the cache.
*/
+ ent = DCH_cache_getnew(fmt_str);
+
parse_format(ent->format, fmt_str, DCH_keywords,
DCH_suff, DCH_index, DCH_TYPE, NULL);
(ent->format + fmt_len)->type = NODE_TYPE_END; /* Paranoia? */
-#ifdef DEBUG_TO_FROM_CHAR
- /* dump_node(ent->format, fmt_len); */
- /* dump_index(DCH_keywords, DCH_index); */
-#endif
}
format = ent->format;
}
#ifdef DEBUG_TO_FROM_CHAR
/* dump_node(format, fmt_len); */
+ /* dump_index(DCH_keywords, DCH_index); */
#endif
- date_str = text_to_cstring(date_txt);
-
DCH_from_char(format, date_str, &tmfc);
- pfree(date_str);
pfree(fmt_str);
if (!incache)
pfree(format);
@@ -3639,8 +3635,7 @@ do_to_timestamp(text *date_txt, text *fmt,
DEBUG_TMFC(&tmfc);
/*
- * Convert values that user define for FROM_CHAR (to_date/to_timestamp) to
- * standard 'tm'
+ * Convert to_date/to_timestamp input fields to standard 'tm'
*/
if (tmfc.ssss)
{
@@ -3696,19 +3691,23 @@ do_to_timestamp(text *date_txt, text *fmt,
tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1;
}
else
+ {
/* find century year for dates ending in "00" */
tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1);
+ }
}
else
- /* If a 4-digit year is provided, we use that and ignore CC. */
{
+ /* If a 4-digit year is provided, we use that and ignore CC. */
tm->tm_year = tmfc.year;
if (tmfc.bc && tm->tm_year > 0)
tm->tm_year = -(tm->tm_year - 1);
}
+ fmask |= DTK_M(YEAR);
}
- else if (tmfc.cc) /* use first year of century */
+ else if (tmfc.cc)
{
+ /* use first year of century */
if (tmfc.bc)
tmfc.cc = -tmfc.cc;
if (tmfc.cc >= 0)
@@ -3717,10 +3716,14 @@ do_to_timestamp(text *date_txt, text *fmt,
else
/* +1 because year == 599 is 600 BC */
tm->tm_year = tmfc.cc * 100 + 1;
+ fmask |= DTK_M(YEAR);
}
if (tmfc.j)
+ {
j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
+ fmask |= DTK_DATE_M;
+ }
if (tmfc.ww)
{
@@ -3734,6 +3737,7 @@ do_to_timestamp(text *date_txt, text *fmt,
isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
else
isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
+ fmask |= DTK_DATE_M;
}
else
tmfc.ddd = (tmfc.ww - 1) * 7 + 1;
@@ -3741,14 +3745,16 @@ do_to_timestamp(text *date_txt, text *fmt,
if (tmfc.w)
tmfc.dd = (tmfc.w - 1) * 7 + 1;
- if (tmfc.d)
- tm->tm_wday = tmfc.d - 1; /* convert to native numbering */
if (tmfc.dd)
+ {
tm->tm_mday = tmfc.dd;
- if (tmfc.ddd)
- tm->tm_yday = tmfc.ddd;
+ fmask |= DTK_M(DAY);
+ }
if (tmfc.mm)
+ {
tm->tm_mon = tmfc.mm;
+ fmask |= DTK_M(MONTH);
+ }
if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1))
{
@@ -3771,6 +3777,7 @@ do_to_timestamp(text *date_txt, text *fmt,
j0 = isoweek2j(tm->tm_year, 1) - 1;
j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
+ fmask |= DTK_DATE_M;
}
else
{
@@ -3785,7 +3792,7 @@ do_to_timestamp(text *date_txt, text *fmt,
for (i = 1; i <= MONTHS_PER_YEAR; i++)
{
- if (tmfc.ddd < y[i])
+ if (tmfc.ddd <= y[i])
break;
}
if (tm->tm_mon <= 1)
@@ -3793,6 +3800,8 @@ do_to_timestamp(text *date_txt, text *fmt,
if (tm->tm_mday <= 1)
tm->tm_mday = tmfc.ddd - y[i - 1];
+
+ fmask |= DTK_M(MONTH) | DTK_M(DAY);
}
}
@@ -3808,7 +3817,38 @@ do_to_timestamp(text *date_txt, text *fmt,
*fsec += (double) tmfc.us / 1000000;
#endif
+ /* Range-check date fields according to bit mask computed above */
+ if (fmask != 0)
+ {
+ /* We already dealt with AD/BC, so pass isjulian = true */
+ int dterr = ValidateDate(fmask, true, false, false, tm);
+
+ if (dterr != 0)
+ {
+ /*
+ * Force the error to be DTERR_FIELD_OVERFLOW even if ValidateDate
+ * said DTERR_MD_FIELD_OVERFLOW, because we don't want to print an
+ * irrelevant hint about datestyle.
+ */
+ DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp");
+ }
+ }
+
+ /* Range-check time fields too */
+ if (tm->tm_hour < 0 || tm->tm_hour >= HOURS_PER_DAY ||
+ tm->tm_min < 0 || tm->tm_min >= MINS_PER_HOUR ||
+ tm->tm_sec < 0 || tm->tm_sec >= SECS_PER_MINUTE ||
+#ifdef HAVE_INT64_TIMESTAMP
+ *fsec < INT64CONST(0) || *fsec >= USECS_PER_SEC
+#else
+ *fsec < 0 || *fsec >= 1
+#endif
+ )
+ DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp");
+
DEBUG_TM(tm);
+
+ pfree(date_str);
}
diff --git a/src/test/regress/expected/horology.out b/src/test/regress/expected/horology.out
index 1fe02be093..f9d12e0f8a 100644
--- a/src/test/regress/expected/horology.out
+++ b/src/test/regress/expected/horology.out
@@ -2822,6 +2822,18 @@ SELECT to_timestamp('20000-1116', 'YYYY-MMDD');
Thu Nov 16 00:00:00 20000 PST
(1 row)
+SELECT to_timestamp('1997 AD 11 16', 'YYYY BC MM DD');
+ to_timestamp
+------------------------------
+ Sun Nov 16 00:00:00 1997 PST
+(1 row)
+
+SELECT to_timestamp('1997 BC 11 16', 'YYYY BC MM DD');
+ to_timestamp
+---------------------------------
+ Tue Nov 16 00:00:00 1997 PST BC
+(1 row)
+
SELECT to_timestamp('9-1116', 'Y-MMDD');
to_timestamp
------------------------------
@@ -2906,6 +2918,18 @@ SELECT to_timestamp(' 20050302', 'YYYYMMDD');
Wed Mar 02 00:00:00 2005 PST
(1 row)
+SELECT to_timestamp('2011-12-18 11:38 AM', 'YYYY-MM-DD HH12:MI PM');
+ to_timestamp
+------------------------------
+ Sun Dec 18 11:38:00 2011 PST
+(1 row)
+
+SELECT to_timestamp('2011-12-18 11:38 PM', 'YYYY-MM-DD HH12:MI PM');
+ to_timestamp
+------------------------------
+ Sun Dec 18 23:38:00 2011 PST
+(1 row)
+
--
-- Check handling of multiple spaces in format and/or input
--
@@ -2982,7 +3006,7 @@ SELECT to_date('2011 12 18', 'YYYY MM DD');
(1 row)
--
--- Check errors for some incorrect usages of to_timestamp()
+-- Check errors for some incorrect usages of to_timestamp() and to_date()
--
-- Mixture of date conventions (ISO week and Gregorian):
SELECT to_timestamp('2005527', 'YYYYIWID');
@@ -3010,6 +3034,76 @@ DETAIL: Value must be an integer.
SELECT to_timestamp('10000000000', 'FMYYYY');
ERROR: value for "YYYY" in source string is out of range
DETAIL: Value must be in the range -2147483648 to 2147483647.
+-- Out-of-range and not-quite-out-of-range fields:
+SELECT to_timestamp('2016-06-13 25:00:00', 'YYYY-MM-DD HH24:MI:SS');
+ERROR: date/time field value out of range: "2016-06-13 25:00:00"
+SELECT to_timestamp('2016-06-13 15:60:00', 'YYYY-MM-DD HH24:MI:SS');
+ERROR: date/time field value out of range: "2016-06-13 15:60:00"
+SELECT to_timestamp('2016-06-13 15:50:60', 'YYYY-MM-DD HH24:MI:SS');
+ERROR: date/time field value out of range: "2016-06-13 15:50:60"
+SELECT to_timestamp('2016-06-13 15:50:55', 'YYYY-MM-DD HH24:MI:SS'); -- ok
+ to_timestamp
+------------------------------
+ Mon Jun 13 15:50:55 2016 PDT
+(1 row)
+
+SELECT to_timestamp('2016-06-13 15:50:55', 'YYYY-MM-DD HH:MI:SS');
+ERROR: hour "15" is invalid for the 12-hour clock
+HINT: Use the 24-hour clock, or give an hour between 1 and 12.
+SELECT to_timestamp('2016-13-01 15:50:55', 'YYYY-MM-DD HH24:MI:SS');
+ERROR: date/time field value out of range: "2016-13-01 15:50:55"
+SELECT to_timestamp('2016-02-30 15:50:55', 'YYYY-MM-DD HH24:MI:SS');
+ERROR: date/time field value out of range: "2016-02-30 15:50:55"
+SELECT to_timestamp('2016-02-29 15:50:55', 'YYYY-MM-DD HH24:MI:SS'); -- ok
+ to_timestamp
+------------------------------
+ Mon Feb 29 15:50:55 2016 PST
+(1 row)
+
+SELECT to_timestamp('2015-02-29 15:50:55', 'YYYY-MM-DD HH24:MI:SS');
+ERROR: date/time field value out of range: "2015-02-29 15:50:55"
+SELECT to_timestamp('2015-02-11 86000', 'YYYY-MM-DD SSSS'); -- ok
+ to_timestamp
+------------------------------
+ Wed Feb 11 23:53:20 2015 PST
+(1 row)
+
+SELECT to_timestamp('2015-02-11 86400', 'YYYY-MM-DD SSSS');
+ERROR: date/time field value out of range: "2015-02-11 86400"
+SELECT to_date('2016-13-10', 'YYYY-MM-DD');
+ERROR: date/time field value out of range: "2016-13-10"
+SELECT to_date('2016-02-30', 'YYYY-MM-DD');
+ERROR: date/time field value out of range: "2016-02-30"
+SELECT to_date('2016-02-29', 'YYYY-MM-DD'); -- ok
+ to_date
+------------
+ 02-29-2016
+(1 row)
+
+SELECT to_date('2015-02-29', 'YYYY-MM-DD');
+ERROR: date/time field value out of range: "2015-02-29"
+SELECT to_date('2015 365', 'YYYY DDD'); -- ok
+ to_date
+------------
+ 12-31-2015
+(1 row)
+
+SELECT to_date('2015 366', 'YYYY DDD');
+ERROR: date/time field value out of range: "2015 366"
+SELECT to_date('2016 365', 'YYYY DDD'); -- ok
+ to_date
+------------
+ 12-30-2016
+(1 row)
+
+SELECT to_date('2016 366', 'YYYY DDD'); -- ok
+ to_date
+------------
+ 12-31-2016
+(1 row)
+
+SELECT to_date('2016 367', 'YYYY DDD');
+ERROR: date/time field value out of range: "2016 367"
--
-- Check behavior with SQL-style fixed-GMT-offset time zone (cf bug #8572)
--
diff --git a/src/test/regress/sql/horology.sql b/src/test/regress/sql/horology.sql
index c81437ba35..a7bc9dcfc4 100644
--- a/src/test/regress/sql/horology.sql
+++ b/src/test/regress/sql/horology.sql
@@ -412,6 +412,9 @@ SELECT to_timestamp('19971116', 'YYYYMMDD');
SELECT to_timestamp('20000-1116', 'YYYY-MMDD');
+SELECT to_timestamp('1997 AD 11 16', 'YYYY BC MM DD');
+SELECT to_timestamp('1997 BC 11 16', 'YYYY BC MM DD');
+
SELECT to_timestamp('9-1116', 'Y-MMDD');
SELECT to_timestamp('95-1116', 'YY-MMDD');
@@ -440,6 +443,9 @@ SELECT to_timestamp(' 2005 03 02', 'YYYYMMDD');
SELECT to_timestamp(' 20050302', 'YYYYMMDD');
+SELECT to_timestamp('2011-12-18 11:38 AM', 'YYYY-MM-DD HH12:MI PM');
+SELECT to_timestamp('2011-12-18 11:38 PM', 'YYYY-MM-DD HH12:MI PM');
+
--
-- Check handling of multiple spaces in format and/or input
--
@@ -461,7 +467,7 @@ SELECT to_date('2011 12 18', 'YYYY MM DD');
SELECT to_date('2011 12 18', 'YYYY MM DD');
--
--- Check errors for some incorrect usages of to_timestamp()
+-- Check errors for some incorrect usages of to_timestamp() and to_date()
--
-- Mixture of date conventions (ISO week and Gregorian):
@@ -482,6 +488,28 @@ SELECT to_timestamp('199711xy', 'YYYYMMDD');
-- Input that doesn't fit in an int:
SELECT to_timestamp('10000000000', 'FMYYYY');
+-- Out-of-range and not-quite-out-of-range fields:
+SELECT to_timestamp('2016-06-13 25:00:00', 'YYYY-MM-DD HH24:MI:SS');
+SELECT to_timestamp('2016-06-13 15:60:00', 'YYYY-MM-DD HH24:MI:SS');
+SELECT to_timestamp('2016-06-13 15:50:60', 'YYYY-MM-DD HH24:MI:SS');
+SELECT to_timestamp('2016-06-13 15:50:55', 'YYYY-MM-DD HH24:MI:SS'); -- ok
+SELECT to_timestamp('2016-06-13 15:50:55', 'YYYY-MM-DD HH:MI:SS');
+SELECT to_timestamp('2016-13-01 15:50:55', 'YYYY-MM-DD HH24:MI:SS');
+SELECT to_timestamp('2016-02-30 15:50:55', 'YYYY-MM-DD HH24:MI:SS');
+SELECT to_timestamp('2016-02-29 15:50:55', 'YYYY-MM-DD HH24:MI:SS'); -- ok
+SELECT to_timestamp('2015-02-29 15:50:55', 'YYYY-MM-DD HH24:MI:SS');
+SELECT to_timestamp('2015-02-11 86000', 'YYYY-MM-DD SSSS'); -- ok
+SELECT to_timestamp('2015-02-11 86400', 'YYYY-MM-DD SSSS');
+SELECT to_date('2016-13-10', 'YYYY-MM-DD');
+SELECT to_date('2016-02-30', 'YYYY-MM-DD');
+SELECT to_date('2016-02-29', 'YYYY-MM-DD'); -- ok
+SELECT to_date('2015-02-29', 'YYYY-MM-DD');
+SELECT to_date('2015 365', 'YYYY DDD'); -- ok
+SELECT to_date('2015 366', 'YYYY DDD');
+SELECT to_date('2016 365', 'YYYY DDD'); -- ok
+SELECT to_date('2016 366', 'YYYY DDD'); -- ok
+SELECT to_date('2016 367', 'YYYY DDD');
+
--
-- Check behavior with SQL-style fixed-GMT-offset time zone (cf bug #8572)
--
--
cgit v1.2.3
From 6ad8ac6026287e3ccbc4d606b6ab6116ccc0eec8 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Wed, 28 Sep 2016 12:00:00 -0400
Subject: Exclude additional directories in pg_basebackup
The list of files and directories that pg_basebackup excludes from the
backup was somewhat incomplete and unorganized. Change that with having
the exclusion driven from tables. Clean up some code around it. Also
document the exclusions in more detail so that users of pg_start_backup
can make use of it as well.
The contents of these directories are now excluded from the backup:
pg_dynshmem, pg_notify, pg_serial, pg_snapshots, pg_subtrans
Also fix a bug that a pg_repl_slot or pg_stat_tmp being a symlink would
cause a corrupt tar header to be created. Now such symlinks are
included in the backup as empty directories. Bug found by Ashutosh
Sharma .
From: David Steele
Reviewed-by: Michael Paquier
---
doc/src/sgml/backup.sgml | 16 ++
doc/src/sgml/protocol.sgml | 13 +-
doc/src/sgml/ref/pg_basebackup.sgml | 10 +-
src/backend/replication/basebackup.c | 260 +++++++++++++++++----------
src/bin/pg_basebackup/t/010_pg_basebackup.pl | 44 ++++-
5 files changed, 243 insertions(+), 100 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/backup.sgml b/doc/src/sgml/backup.sgml
index 0f09d82d65..95d0ff3149 100644
--- a/doc/src/sgml/backup.sgml
+++ b/doc/src/sgml/backup.sgml
@@ -1089,6 +1089,22 @@ SELECT pg_stop_backup();
the new master comes on line.
+
+ The contents of the directories pg_dynshmem/>,
+ pg_notify/>, pg_serial/>,
+ pg_snapshots/>, pg_stat_tmp/>,
+ and pg_subtrans/> (but not the directories themselves) can be
+ omitted from the backup as they will be initialized on postmaster startup.
+ If is set and is under the data
+ directory then the contents of that directory can also be omitted.
+
+
+
+ Any file or directory beginning with pgsql_tmp can be
+ omitted from the backup. These files are removed on postmaster start and
+ the directories will be recreated as needed.
+
+
The backup label
file includes the label string you gave to pg_start_backup>,
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index 68b0941029..3384e73448 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -2069,7 +2069,9 @@ The commands accepted in walsender mode are:
- various temporary files created during the operation of the PostgreSQL server
+ Various temporary files and directories created during the operation
+ of the PostgreSQL server, such as any file or directory beginning
+ with pgsql_tmp>.
@@ -2082,13 +2084,18 @@ The commands accepted in walsender mode are:
- pg_replslot> is copied as an empty directory.
+ pg_dynshmem>, pg_notify>,
+ pg_replslot>, pg_serial>,
+ pg_snapshots>, pg_stat_tmp>, and
+ pg_subtrans> are copied as empty directories (even if
+ they are symbolic links).
Files other than regular files and directories, such as symbolic
- links and special device files, are skipped. (Symbolic links
+ links (other than for the directories listed above) and special
+ device files, are skipped. (Symbolic links
in pg_tblspc are maintained.)
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index 9f1eae12d8..fe557ed002 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -610,10 +610,12 @@ PostgreSQL documentation
The backup will include all files in the data directory and tablespaces,
including the configuration files and any additional files placed in the
- directory by third parties. But only regular files and directories are
- copied. Symbolic links (other than those used for tablespaces) and special
- device files are skipped. (See for
- the precise details.)
+ directory by third parties, except certain temporary files managed by
+ PostgreSQL. But only regular files and directories are copied, except that
+ symbolic links used for tablespaces are preserved. Symbolic links pointing
+ to certain directories known to PostgreSQL are copied as empty directories.
+ Other symbolic links and special device files are skipped.
+ See for the precise details.
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index da9b7a6f0d..1eabaef492 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -30,6 +30,7 @@
#include "replication/basebackup.h"
#include "replication/walsender.h"
#include "replication/walsender_private.h"
+#include "storage/dsm_impl.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "utils/builtins.h"
@@ -55,8 +56,10 @@ static int64 sendDir(char *path, int basepathlen, bool sizeonly,
static bool sendFile(char *readfilename, char *tarfilename,
struct stat * statbuf, bool missing_ok);
static void sendFileWithContent(const char *filename, const char *content);
-static void _tarWriteHeader(const char *filename, const char *linktarget,
- struct stat * statbuf);
+static int64 _tarWriteHeader(const char *filename, const char *linktarget,
+ struct stat * statbuf, bool sizeonly);
+static int64 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf,
+ bool sizeonly);
static void send_int8_string(StringInfoData *buf, int64 intval);
static void SendBackupHeader(List *tablespaces);
static void base_backup_cleanup(int code, Datum arg);
@@ -94,6 +97,73 @@ static int64 elapsed_min_unit;
/* The last check of the transfer rate. */
static int64 throttled_last;
+/*
+ * The contents of these directories are removed or recreated during server
+ * start so they are not included in backups. The directories themselves are
+ * kept and included as empty to preserve access permissions.
+ */
+static const char *excludeDirContents[] =
+{
+ /*
+ * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped even
+ * when stats_temp_directory is set because PGSS_TEXT_FILE is always created
+ * there.
+ */
+ PG_STAT_TMP_DIR,
+
+ /*
+ * It is generally not useful to backup the contents of this directory even
+ * if the intention is to restore to another master. See backup.sgml for a
+ * more detailed description.
+ */
+ "pg_replslot",
+
+ /* Contents removed on startup, see dsm_cleanup_for_mmap(). */
+ PG_DYNSHMEM_DIR,
+
+ /* Contents removed on startup, see AsyncShmemInit(). */
+ "pg_notify",
+
+ /*
+ * Old contents are loaded for possible debugging but are not required for
+ * normal operation, see OldSerXidInit().
+ */
+ "pg_serial",
+
+ /* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */
+ "pg_snapshots",
+
+ /* Contents zeroed on startup, see StartupSUBTRANS(). */
+ "pg_subtrans",
+
+ /* end of list */
+ NULL
+};
+
+/*
+ * List of files excluded from backups.
+ */
+static const char *excludeFiles[] =
+{
+ /* Skip auto conf temporary file. */
+ PG_AUTOCONF_FILENAME ".tmp",
+
+ /*
+ * If there's a backup_label or tablespace_map file, it belongs to a
+ * backup started by the user with pg_start_backup(). It is *not* correct
+ * for this backup. Our backup_label/tablespace_map is injected into the
+ * tar separately.
+ */
+ BACKUP_LABEL_FILE,
+ TABLESPACE_MAP,
+
+ "postmaster.pid",
+ "postmaster.opts",
+
+ /* end of list */
+ NULL
+};
+
/*
* Called when ERROR or FATAL happens in perform_base_backup() after
* we have started the backup - make sure we end it!
@@ -415,7 +485,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
}
/* send the WAL file itself */
- _tarWriteHeader(pathbuf, NULL, &statbuf);
+ _tarWriteHeader(pathbuf, NULL, &statbuf, false);
while ((cnt = fread(buf, 1, Min(sizeof(buf), XLogSegSize - len), fp)) > 0)
{
@@ -807,7 +877,7 @@ sendFileWithContent(const char *filename, const char *content)
statbuf.st_mode = S_IRUSR | S_IWUSR;
statbuf.st_size = len;
- _tarWriteHeader(filename, NULL, &statbuf);
+ _tarWriteHeader(filename, NULL, &statbuf, false);
/* Send the contents as a CopyData message */
pq_putmessage('d', content, len);
@@ -858,9 +928,9 @@ sendTablespace(char *path, bool sizeonly)
/* If the tablespace went away while scanning, it's no error. */
return 0;
}
- if (!sizeonly)
- _tarWriteHeader(TABLESPACE_VERSION_DIRECTORY, NULL, &statbuf);
- size = 512; /* Size of the header just added */
+
+ size = _tarWriteHeader(TABLESPACE_VERSION_DIRECTORY, NULL, &statbuf,
+ sizeonly);
/* Send all the files in the tablespace version directory */
size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true);
@@ -893,6 +963,9 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
dir = AllocateDir(path);
while ((de = ReadDir(dir, path)) != NULL)
{
+ int excludeIdx;
+ bool excludeFound;
+
/* Skip special stuff */
if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
continue;
@@ -903,24 +976,6 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
strlen(PG_TEMP_FILE_PREFIX)) == 0)
continue;
- /* skip auto conf temporary file */
- if (strncmp(de->d_name,
- PG_AUTOCONF_FILENAME ".tmp",
- sizeof(PG_AUTOCONF_FILENAME) + 4) == 0)
- continue;
-
- /*
- * If there's a backup_label or tablespace_map file, it belongs to a
- * backup started by the user with pg_start_backup(). It is *not*
- * correct for this backup, our backup_label/tablespace_map is
- * injected into the tar separately.
- */
- if (strcmp(de->d_name, BACKUP_LABEL_FILE) == 0)
- continue;
-
- if (strcmp(de->d_name, TABLESPACE_MAP) == 0)
- continue;
-
/*
* Check if the postmaster has signaled us to exit, and abort with an
* error in that case. The error handler further up will call
@@ -938,13 +993,23 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
"and should not be used. "
"Try taking another online backup.")));
- snprintf(pathbuf, MAXPGPATH, "%s/%s", path, de->d_name);
+ /* Scan for files that should be excluded */
+ excludeFound = false;
+ for (excludeIdx = 0; excludeFiles[excludeIdx] != NULL; excludeIdx++)
+ {
+ if (strcmp(de->d_name, excludeFiles[excludeIdx]) == 0)
+ {
+ elog(DEBUG1, "file \"%s\" excluded from backup", de->d_name);
+ excludeFound = true;
+ break;
+ }
+ }
- /* Skip postmaster.pid and postmaster.opts in the data directory */
- if (strcmp(pathbuf, "./postmaster.pid") == 0 ||
- strcmp(pathbuf, "./postmaster.opts") == 0)
+ if (excludeFound)
continue;
+ snprintf(pathbuf, MAXPGPATH, "%s/%s", path, de->d_name);
+
/* Skip pg_control here to back up it last */
if (strcmp(pathbuf, "./global/pg_control") == 0)
continue;
@@ -957,33 +1022,34 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
errmsg("could not stat file or directory \"%s\": %m",
pathbuf)));
- /* If the file went away while scanning, it's no error. */
+ /* If the file went away while scanning, it's not an error. */
continue;
}
- /*
- * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped
- * even when stats_temp_directory is set because PGSS_TEXT_FILE is
- * always created there.
- */
- if ((statrelpath != NULL && strcmp(pathbuf, statrelpath) == 0) ||
- strncmp(de->d_name, PG_STAT_TMP_DIR, strlen(PG_STAT_TMP_DIR)) == 0)
+ /* Scan for directories whose contents should be excluded */
+ excludeFound = false;
+ for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++)
{
- if (!sizeonly)
- _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf);
- size += 512;
- continue;
+ if (strcmp(de->d_name, excludeDirContents[excludeIdx]) == 0)
+ {
+ elog(DEBUG1, "contents of directory \"%s\" excluded from backup", de->d_name);
+ size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
+ excludeFound = true;
+ break;
+ }
}
+ if (excludeFound)
+ continue;
+
/*
- * Skip pg_replslot, not useful to copy. But include it as an empty
- * directory anyway, so we get permissions right.
+ * Exclude contents of directory specified by statrelpath if not set
+ * to the default (pg_stat_tmp) which is caught in the loop above.
*/
- if (strcmp(de->d_name, "pg_replslot") == 0)
+ if (statrelpath != NULL && strcmp(pathbuf, statrelpath) == 0)
{
- if (!sizeonly)
- _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf);
- size += 512; /* Size of the header just added */
+ elog(DEBUG1, "contents of directory \"%s\" excluded from backup", statrelpath);
+ size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
continue;
}
@@ -994,26 +1060,15 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
*/
if (strcmp(pathbuf, "./pg_xlog") == 0)
{
- if (!sizeonly)
- {
- /* If pg_xlog is a symlink, write it as a directory anyway */
-#ifndef WIN32
- if (S_ISLNK(statbuf.st_mode))
-#else
- if (pgwin32_is_junction(pathbuf))
-#endif
- statbuf.st_mode = S_IFDIR | S_IRWXU;
- _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf);
- }
- size += 512; /* Size of the header just added */
+ /* If pg_xlog is a symlink, write it as a directory anyway */
+ size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
/*
* Also send archive_status directory (by hackishly reusing
* statbuf from above ...).
*/
- if (!sizeonly)
- _tarWriteHeader("./pg_xlog/archive_status", NULL, &statbuf);
- size += 512; /* Size of the header just added */
+ size += _tarWriteHeader("./pg_xlog/archive_status", NULL, &statbuf,
+ sizeonly);
continue; /* don't recurse into pg_xlog */
}
@@ -1044,9 +1099,8 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
pathbuf)));
linkpath[rllen] = '\0';
- if (!sizeonly)
- _tarWriteHeader(pathbuf + basepathlen + 1, linkpath, &statbuf);
- size += 512; /* Size of the header just added */
+ size += _tarWriteHeader(pathbuf + basepathlen + 1, linkpath,
+ &statbuf, sizeonly);
#else
/*
@@ -1069,9 +1123,8 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
* Store a directory entry in the tar file so we can get the
* permissions right.
*/
- if (!sizeonly)
- _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf);
- size += 512; /* Size of the header just added */
+ size += _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf,
+ sizeonly);
/*
* Call ourselves recursively for a directory, unless it happens
@@ -1162,7 +1215,7 @@ sendFile(char *readfilename, char *tarfilename, struct stat * statbuf,
errmsg("could not open file \"%s\": %m", readfilename)));
}
- _tarWriteHeader(tarfilename, NULL, statbuf);
+ _tarWriteHeader(tarfilename, NULL, statbuf, false);
while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0)
{
@@ -1215,36 +1268,61 @@ sendFile(char *readfilename, char *tarfilename, struct stat * statbuf,
}
-static void
+static int64
_tarWriteHeader(const char *filename, const char *linktarget,
- struct stat * statbuf)
+ struct stat * statbuf, bool sizeonly)
{
char h[512];
enum tarError rc;
- rc = tarCreateHeader(h, filename, linktarget, statbuf->st_size,
- statbuf->st_mode, statbuf->st_uid, statbuf->st_gid,
- statbuf->st_mtime);
-
- switch (rc)
+ if (!sizeonly)
{
- case TAR_OK:
- break;
- case TAR_NAME_TOO_LONG:
- ereport(ERROR,
- (errmsg("file name too long for tar format: \"%s\"",
- filename)));
- break;
- case TAR_SYMLINK_TOO_LONG:
- ereport(ERROR,
- (errmsg("symbolic link target too long for tar format: file name \"%s\", target \"%s\"",
- filename, linktarget)));
- break;
- default:
- elog(ERROR, "unrecognized tar error: %d", rc);
+ rc = tarCreateHeader(h, filename, linktarget, statbuf->st_size,
+ statbuf->st_mode, statbuf->st_uid, statbuf->st_gid,
+ statbuf->st_mtime);
+
+ switch (rc)
+ {
+ case TAR_OK:
+ break;
+ case TAR_NAME_TOO_LONG:
+ ereport(ERROR,
+ (errmsg("file name too long for tar format: \"%s\"",
+ filename)));
+ break;
+ case TAR_SYMLINK_TOO_LONG:
+ ereport(ERROR,
+ (errmsg("symbolic link target too long for tar format: "
+ "file name \"%s\", target \"%s\"",
+ filename, linktarget)));
+ break;
+ default:
+ elog(ERROR, "unrecognized tar error: %d", rc);
+ }
+
+ pq_putmessage('d', h, sizeof(h));
}
- pq_putmessage('d', h, 512);
+ return sizeof(h);
+}
+
+/*
+ * Write tar header for a directory. If the entry in statbuf is a link then
+ * write it as a directory anyway.
+ */
+static int64
+_tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf,
+ bool sizeonly)
+{
+ /* If symlink, write it as a directory anyway */
+#ifndef WIN32
+ if (S_ISLNK(statbuf->st_mode))
+#else
+ if (pgwin32_is_junction(pathbuf))
+#endif
+ statbuf->st_mode = S_IFDIR | S_IRWXU;
+
+ return _tarWriteHeader(pathbuf + basepathlen + 1, NULL, statbuf, sizeonly);
}
/*
diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
index fd9857d67b..a52bd4e124 100644
--- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl
+++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
@@ -4,7 +4,7 @@ use Cwd;
use Config;
use PostgresNode;
use TestLib;
-use Test::More tests => 54;
+use Test::More tests => 67;
program_help_ok('pg_basebackup');
program_version_ok('pg_basebackup');
@@ -55,15 +55,43 @@ print CONF "wal_level = replica\n";
close CONF;
$node->restart;
+# Write some files to test that they are not copied.
+foreach my $filename (qw(backup_label tablespace_map postgresql.auto.conf.tmp))
+{
+ open FILE, ">>$pgdata/$filename";
+ print FILE "DONOTCOPY";
+ close FILE;
+}
+
$node->command_ok([ 'pg_basebackup', '-D', "$tempdir/backup" ],
'pg_basebackup runs');
ok(-f "$tempdir/backup/PG_VERSION", 'backup was created');
+# Only archive_status directory should be copied in pg_xlog/.
is_deeply(
[ sort(slurp_dir("$tempdir/backup/pg_xlog/")) ],
[ sort qw(. .. archive_status) ],
'no WAL files copied');
+# Contents of these directories should not be copied.
+foreach my $dirname (qw(pg_dynshmem pg_notify pg_replslot pg_serial pg_snapshots pg_stat_tmp pg_subtrans))
+{
+ is_deeply(
+ [ sort(slurp_dir("$tempdir/backup/$dirname/")) ],
+ [ sort qw(. ..) ],
+ "contents of $dirname/ not copied");
+}
+
+# These files should not be copied.
+foreach my $filename (qw(postgresql.auto.conf.tmp postmaster.opts postmaster.pid tablespace_map))
+{
+ ok(! -f "$tempdir/backup/$filename", "$filename not copied");
+}
+
+# Make sure existing backup_label was ignored.
+isnt(slurp_file("$tempdir/backup/backup_label"), 'DONOTCOPY',
+ 'existing backup_label not copied');
+
$node->command_ok(
[ 'pg_basebackup', '-D', "$tempdir/backup2", '--xlogdir',
"$tempdir/xlog2" ],
@@ -110,7 +138,17 @@ unlink "$pgdata/$superlongname";
# skip on Windows.
SKIP:
{
- skip "symlinks not supported on Windows", 10 if ($windows_os);
+ skip "symlinks not supported on Windows", 11 if ($windows_os);
+
+ # Move pg_replslot out of $pgdata and create a symlink to it.
+ $node->stop;
+
+ rename("$pgdata/pg_replslot", "$tempdir/pg_replslot")
+ or BAIL_OUT "could not move $pgdata/pg_replslot";
+ symlink("$tempdir/pg_replslot", "$pgdata/pg_replslot")
+ or BAIL_OUT "could not symlink to $pgdata/pg_replslot";
+
+ $node->start;
# Create a temporary directory in the system location and symlink it
# to our physical temp location. That way we can use shorter names
@@ -148,6 +186,8 @@ SKIP:
"tablespace symlink was updated");
closedir $dh;
+ ok(-d "$tempdir/backup1/pg_replslot", 'pg_replslot symlink copied as directory');
+
mkdir "$tempdir/tbl=spc2";
$node->safe_psql('postgres', "DROP TABLE test1;");
$node->safe_psql('postgres', "DROP TABLESPACE tblspc1;");
--
cgit v1.2.3
From 6ed2d8584cc680a2d6898480de74a57cd96176b5 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Thu, 29 Sep 2016 12:00:00 -0400
Subject: pg_basebackup: Add --nosync option
This is useful for testing, similar to initdb's --nosync.
From: Michael Paquier
---
doc/src/sgml/ref/pg_basebackup.sgml | 15 +++++++++++++++
src/bin/pg_basebackup/pg_basebackup.c | 28 +++++++++++++++++++---------
src/bin/pg_basebackup/pg_receivexlog.c | 1 +
src/bin/pg_basebackup/receivelog.c | 34 ++++++++++++++++++----------------
src/bin/pg_basebackup/receivelog.h | 4 +++-
5 files changed, 56 insertions(+), 26 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index fe557ed002..55e913f70d 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -438,6 +438,21 @@ PostgreSQL documentation
+
+
-N
+
--nosync
+
+
+ By default, pg_basebackup will wait for all files
+ to be written safely to disk. This option causes
+ pg_basebackup to return without waiting, which is
+ faster, but means that a subsequent operating system crash can leave
+ the base backup corrupt. Generally, this option is useful for testing
+ but should not be used when creating a production installation.
+
+
+
+
-v
--verbose
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index cd7d095103..0f5d9d6a87 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -69,6 +69,7 @@ static bool includewal = false;
static bool streamwal = false;
static bool fastcheckpoint = false;
static bool writerecoveryconf = false;
+static bool do_sync = true;
static int standby_message_timeout = 10 * 1000; /* 10 sec = default */
static pg_time_t last_progress_report = 0;
static int32 maxrate = 0; /* no limit by default */
@@ -329,6 +330,7 @@ usage(void)
" set fast or spread checkpointing\n"));
printf(_(" -l, --label=LABEL set backup label\n"));
printf(_(" -n, --noclean do not clean up after errors\n"));
+ printf(_(" -N, --nosync do not wait for changes to be written safely to disk\n"));
printf(_(" -P, --progress show progress information\n"));
printf(_(" -v, --verbose output verbose messages\n"));
printf(_(" -V, --version output version information, then exit\n"));
@@ -460,6 +462,7 @@ LogStreamerMain(logstreamer_param *param)
stream.stream_stop = reached_end_position;
stream.standby_message_timeout = standby_message_timeout;
stream.synchronous = false;
+ stream.do_sync = do_sync;
stream.mark_done = true;
stream.basedir = param->xlogdir;
stream.partial_suffix = NULL;
@@ -1199,7 +1202,7 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum)
PQfreemem(copybuf);
/* sync the resulting tar file, errors are not considered fatal */
- if (strcmp(basedir, "-") != 0)
+ if (do_sync && strcmp(basedir, "-") != 0)
(void) fsync_fname(filename, false, progname);
}
@@ -1967,14 +1970,17 @@ BaseBackup(void)
* all the data of the base directory is synced, taking into account
* all the tablespaces. Errors are not considered fatal.
*/
- if (format == 't')
+ if (do_sync)
{
- if (strcmp(basedir, "-") != 0)
- (void) fsync_fname(basedir, true, progname);
- }
- else
- {
- (void) fsync_pgdata(basedir, progname);
+ if (format == 't')
+ {
+ if (strcmp(basedir, "-") != 0)
+ (void) fsync_fname(basedir, true, progname);
+ }
+ else
+ {
+ (void) fsync_pgdata(basedir, progname);
+ }
}
if (verbose)
@@ -2001,6 +2007,7 @@ main(int argc, char **argv)
{"compress", required_argument, NULL, 'Z'},
{"label", required_argument, NULL, 'l'},
{"noclean", no_argument, NULL, 'n'},
+ {"nosync", no_argument, NULL, 'N'},
{"dbname", required_argument, NULL, 'd'},
{"host", required_argument, NULL, 'h'},
{"port", required_argument, NULL, 'p'},
@@ -2037,7 +2044,7 @@ main(int argc, char **argv)
atexit(cleanup_directories_atexit);
- while ((c = getopt_long(argc, argv, "D:F:r:RT:xX:l:nzZ:d:c:h:p:U:s:S:wWvP",
+ while ((c = getopt_long(argc, argv, "D:F:r:RT:xX:l:nNzZ:d:c:h:p:U:s:S:wWvP",
long_options, &option_index)) != -1)
{
switch (c)
@@ -2115,6 +2122,9 @@ main(int argc, char **argv)
case 'n':
noclean = true;
break;
+ case 'N':
+ do_sync = false;
+ break;
case 'z':
#ifdef HAVE_LIBZ
compresslevel = Z_DEFAULT_COMPRESSION;
diff --git a/src/bin/pg_basebackup/pg_receivexlog.c b/src/bin/pg_basebackup/pg_receivexlog.c
index 7f7ee9dc9b..a58a251a59 100644
--- a/src/bin/pg_basebackup/pg_receivexlog.c
+++ b/src/bin/pg_basebackup/pg_receivexlog.c
@@ -336,6 +336,7 @@ StreamLog(void)
stream.stream_stop = stop_streaming;
stream.standby_message_timeout = standby_message_timeout;
stream.synchronous = synchronous;
+ stream.do_sync = true;
stream.mark_done = false;
stream.basedir = basedir;
stream.partial_suffix = ".partial";
diff --git a/src/bin/pg_basebackup/receivelog.c b/src/bin/pg_basebackup/receivelog.c
index 6b78a60f27..8f29d19114 100644
--- a/src/bin/pg_basebackup/receivelog.c
+++ b/src/bin/pg_basebackup/receivelog.c
@@ -41,8 +41,8 @@ static PGresult *HandleCopyStream(PGconn *conn, StreamCtl *stream,
XLogRecPtr *stoppos);
static int CopyStreamPoll(PGconn *conn, long timeout_ms);
static int CopyStreamReceive(PGconn *conn, long timeout, char **buffer);
-static bool ProcessKeepaliveMsg(PGconn *conn, char *copybuf, int len,
- XLogRecPtr blockpos, int64 *last_status);
+static bool ProcessKeepaliveMsg(PGconn *conn, StreamCtl *stream, char *copybuf,
+ int len, XLogRecPtr blockpos, int64 *last_status);
static bool ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len,
XLogRecPtr *blockpos);
static PGresult *HandleEndOfCopyStream(PGconn *conn, StreamCtl *stream, char *copybuf,
@@ -56,7 +56,7 @@ static bool ReadEndOfStreamingResult(PGresult *res, XLogRecPtr *startpos,
uint32 *timeline);
static bool
-mark_file_as_archived(const char *basedir, const char *fname)
+mark_file_as_archived(const char *basedir, const char *fname, bool do_sync)
{
int fd;
static char tmppath[MAXPGPATH];
@@ -74,10 +74,10 @@ mark_file_as_archived(const char *basedir, const char *fname)
close(fd);
- if (fsync_fname(tmppath, false, progname) != 0)
+ if (do_sync && fsync_fname(tmppath, false, progname) != 0)
return false;
- if (fsync_parent_path(tmppath, progname) != 0)
+ if (do_sync && fsync_parent_path(tmppath, progname) != 0)
return false;
return true;
@@ -134,9 +134,9 @@ open_walfile(StreamCtl *stream, XLogRecPtr startpoint)
* fsync, in case of a previous crash between padding and fsyncing the
* file.
*/
- if (fsync_fname(fn, false, progname) != 0)
+ if (stream->do_sync && fsync_fname(fn, false, progname) != 0)
return false;
- if (fsync_parent_path(fn, progname) != 0)
+ if (stream->do_sync && fsync_parent_path(fn, progname) != 0)
return false;
return true;
@@ -173,9 +173,9 @@ open_walfile(StreamCtl *stream, XLogRecPtr startpoint)
* using synchronous mode, where the file is modified and fsynced
* in-place, without a directory fsync.
*/
- if (fsync_fname(fn, false, progname) != 0)
+ if (stream->do_sync && fsync_fname(fn, false, progname) != 0)
return false;
- if (fsync_parent_path(fn, progname) != 0)
+ if (stream->do_sync && fsync_parent_path(fn, progname) != 0)
return false;
if (lseek(f, SEEK_SET, 0) != 0)
@@ -212,7 +212,7 @@ close_walfile(StreamCtl *stream, XLogRecPtr pos)
return false;
}
- if (fsync(walfile) != 0)
+ if (stream->do_sync && fsync(walfile) != 0)
{
fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"),
progname, current_walfile_name, strerror(errno));
@@ -258,7 +258,8 @@ close_walfile(StreamCtl *stream, XLogRecPtr pos)
if (currpos == XLOG_SEG_SIZE && stream->mark_done)
{
/* writes error message if failed */
- if (!mark_file_as_archived(stream->basedir, current_walfile_name))
+ if (!mark_file_as_archived(stream->basedir, current_walfile_name,
+ stream->do_sync))
return false;
}
@@ -378,7 +379,8 @@ writeTimeLineHistoryFile(StreamCtl *stream, char *filename, char *content)
if (stream->mark_done)
{
/* writes error message if failed */
- if (!mark_file_as_archived(stream->basedir, histfname))
+ if (!mark_file_as_archived(stream->basedir, histfname,
+ stream->do_sync))
return false;
}
@@ -836,7 +838,7 @@ HandleCopyStream(PGconn *conn, StreamCtl *stream,
*/
if (stream->synchronous && lastFlushPosition < blockpos && walfile != -1)
{
- if (fsync(walfile) != 0)
+ if (stream->do_sync && fsync(walfile) != 0)
{
fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"),
progname, current_walfile_name, strerror(errno));
@@ -890,7 +892,7 @@ HandleCopyStream(PGconn *conn, StreamCtl *stream,
/* Check the message type. */
if (copybuf[0] == 'k')
{
- if (!ProcessKeepaliveMsg(conn, copybuf, r, blockpos,
+ if (!ProcessKeepaliveMsg(conn, stream, copybuf, r, blockpos,
&last_status))
goto error;
}
@@ -1043,7 +1045,7 @@ CopyStreamReceive(PGconn *conn, long timeout, char **buffer)
* Process the keepalive message.
*/
static bool
-ProcessKeepaliveMsg(PGconn *conn, char *copybuf, int len,
+ProcessKeepaliveMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len,
XLogRecPtr blockpos, int64 *last_status)
{
int pos;
@@ -1079,7 +1081,7 @@ ProcessKeepaliveMsg(PGconn *conn, char *copybuf, int len,
* data has been successfully replicated or not, at the normal
* shutdown of the server.
*/
- if (fsync(walfile) != 0)
+ if (stream->do_sync && fsync(walfile) != 0)
{
fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"),
progname, current_walfile_name, strerror(errno));
diff --git a/src/bin/pg_basebackup/receivelog.h b/src/bin/pg_basebackup/receivelog.h
index 554ff8b5b2..7a3bbc5080 100644
--- a/src/bin/pg_basebackup/receivelog.h
+++ b/src/bin/pg_basebackup/receivelog.h
@@ -34,8 +34,10 @@ typedef struct StreamCtl
* timeline */
int standby_message_timeout; /* Send status messages this
* often */
- bool synchronous; /* Flush data on write */
+ bool synchronous; /* Flush immediately WAL data on write */
bool mark_done; /* Mark segment as done in generated archive */
+ bool do_sync; /* Flush to disk to ensure consistent state
+ * of data */
stream_stop_callback stream_stop; /* Stop streaming when returns true */
--
cgit v1.2.3
From 8e91e12bc3af85ba2287866669268f6825d2cc03 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Thu, 29 Sep 2016 13:32:27 -0400
Subject: Allow contrib/file_fdw to read from a program, like COPY FROM
PROGRAM.
This patch just exposes COPY's FROM PROGRAM option in contrib/file_fdw.
There don't seem to be any security issues with that that are any worse
than what already exist with file_fdw and COPY; as in the existing cases,
only superusers are allowed to control what gets executed.
A regression test case might be nice here, but choosing a 100% portable
command to run is hard. (We haven't got a test for COPY FROM PROGRAM
itself, either.)
Corey Huinker and Adam Gomaa, reviewed by Amit Langote
Discussion:
---
contrib/file_fdw/file_fdw.c | 131 +++++++++++++++++++++-----------
contrib/file_fdw/output/file_fdw.source | 6 +-
doc/src/sgml/file-fdw.sgml | 66 +++++++++++-----
3 files changed, 137 insertions(+), 66 deletions(-)
(limited to 'doc/src')
diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c
index b471991318..d325f53467 100644
--- a/contrib/file_fdw/file_fdw.c
+++ b/contrib/file_fdw/file_fdw.c
@@ -1,7 +1,7 @@
/*-------------------------------------------------------------------------
*
* file_fdw.c
- * foreign-data wrapper for server-side flat files.
+ * foreign-data wrapper for server-side flat files (or programs).
*
* Copyright (c) 2010-2016, PostgreSQL Global Development Group
*
@@ -57,8 +57,9 @@ struct FileFdwOption
* fileGetOptions(), which currently doesn't bother to look at user mappings.
*/
static const struct FileFdwOption valid_options[] = {
- /* File options */
+ /* Data source options */
{"filename", ForeignTableRelationId},
+ {"program", ForeignTableRelationId},
/* Format options */
/* oids option is not supported */
@@ -85,10 +86,12 @@ static const struct FileFdwOption valid_options[] = {
*/
typedef struct FileFdwPlanState
{
- char *filename; /* file to read */
- List *options; /* merged COPY options, excluding filename */
+ char *filename; /* file or program to read from */
+ bool is_program; /* true if filename represents an OS command */
+ List *options; /* merged COPY options, excluding filename and
+ * is_program */
BlockNumber pages; /* estimate of file's physical size */
- double ntuples; /* estimate of number of rows in file */
+ double ntuples; /* estimate of number of data rows */
} FileFdwPlanState;
/*
@@ -96,9 +99,11 @@ typedef struct FileFdwPlanState
*/
typedef struct FileFdwExecutionState
{
- char *filename; /* file to read */
- List *options; /* merged COPY options, excluding filename */
- CopyState cstate; /* state of reading file */
+ char *filename; /* file or program to read from */
+ bool is_program; /* true if filename represents an OS command */
+ List *options; /* merged COPY options, excluding filename and
+ * is_program */
+ CopyState cstate; /* COPY execution state */
} FileFdwExecutionState;
/*
@@ -139,7 +144,9 @@ static bool fileIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel,
*/
static bool is_valid_option(const char *option, Oid context);
static void fileGetOptions(Oid foreigntableid,
- char **filename, List **other_options);
+ char **filename,
+ bool *is_program,
+ List **other_options);
static List *get_file_fdw_attribute_options(Oid relid);
static bool check_selective_binary_conversion(RelOptInfo *baserel,
Oid foreigntableid,
@@ -196,16 +203,16 @@ file_fdw_validator(PG_FUNCTION_ARGS)
/*
* Only superusers are allowed to set options of a file_fdw foreign table.
- * This is because the filename is one of those options, and we don't want
- * non-superusers to be able to determine which file gets read.
+ * This is because we don't want non-superusers to be able to control
+ * which file gets read or which program gets executed.
*
* Putting this sort of permissions check in a validator is a bit of a
* crock, but there doesn't seem to be any other place that can enforce
* the check more cleanly.
*
- * Note that the valid_options[] array disallows setting filename at any
- * options level other than foreign table --- otherwise there'd still be a
- * security hole.
+ * Note that the valid_options[] array disallows setting filename and
+ * program at any options level other than foreign table --- otherwise
+ * there'd still be a security hole.
*/
if (catalog == ForeignTableRelationId && !superuser())
ereport(ERROR,
@@ -247,11 +254,11 @@ file_fdw_validator(PG_FUNCTION_ARGS)
}
/*
- * Separate out filename and column-specific options, since
+ * Separate out filename, program, and column-specific options, since
* ProcessCopyOptions won't accept them.
*/
-
- if (strcmp(def->defname, "filename") == 0)
+ if (strcmp(def->defname, "filename") == 0 ||
+ strcmp(def->defname, "program") == 0)
{
if (filename)
ereport(ERROR,
@@ -296,12 +303,13 @@ file_fdw_validator(PG_FUNCTION_ARGS)
ProcessCopyOptions(NULL, NULL, true, other_options);
/*
- * Filename option is required for file_fdw foreign tables.
+ * Either filename or program option is required for file_fdw foreign
+ * tables.
*/
if (catalog == ForeignTableRelationId && filename == NULL)
ereport(ERROR,
(errcode(ERRCODE_FDW_DYNAMIC_PARAMETER_VALUE_NEEDED),
- errmsg("filename is required for file_fdw foreign tables")));
+ errmsg("either filename or program is required for file_fdw foreign tables")));
PG_RETURN_VOID();
}
@@ -326,12 +334,12 @@ is_valid_option(const char *option, Oid context)
/*
* Fetch the options for a file_fdw foreign table.
*
- * We have to separate out "filename" from the other options because
- * it must not appear in the options list passed to the core COPY code.
+ * We have to separate out filename/program from the other options because
+ * those must not appear in the options list passed to the core COPY code.
*/
static void
fileGetOptions(Oid foreigntableid,
- char **filename, List **other_options)
+ char **filename, bool *is_program, List **other_options)
{
ForeignTable *table;
ForeignServer *server;
@@ -359,9 +367,11 @@ fileGetOptions(Oid foreigntableid,
options = list_concat(options, get_file_fdw_attribute_options(foreigntableid));
/*
- * Separate out the filename.
+ * Separate out the filename or program option (we assume there is only
+ * one).
*/
*filename = NULL;
+ *is_program = false;
prev = NULL;
foreach(lc, options)
{
@@ -373,15 +383,22 @@ fileGetOptions(Oid foreigntableid,
options = list_delete_cell(options, lc, prev);
break;
}
+ else if (strcmp(def->defname, "program") == 0)
+ {
+ *filename = defGetString(def);
+ *is_program = true;
+ options = list_delete_cell(options, lc, prev);
+ break;
+ }
prev = lc;
}
/*
- * The validator should have checked that a filename was included in the
- * options, but check again, just in case.
+ * The validator should have checked that filename or program was included
+ * in the options, but check again, just in case.
*/
if (*filename == NULL)
- elog(ERROR, "filename is required for file_fdw foreign tables");
+ elog(ERROR, "either filename or program is required for file_fdw foreign tables");
*other_options = options;
}
@@ -475,12 +492,15 @@ fileGetForeignRelSize(PlannerInfo *root,
FileFdwPlanState *fdw_private;
/*
- * Fetch options. We only need filename at this point, but we might as
- * well get everything and not need to re-fetch it later in planning.
+ * Fetch options. We only need filename (or program) at this point, but
+ * we might as well get everything and not need to re-fetch it later in
+ * planning.
*/
fdw_private = (FileFdwPlanState *) palloc(sizeof(FileFdwPlanState));
fileGetOptions(foreigntableid,
- &fdw_private->filename, &fdw_private->options);
+ &fdw_private->filename,
+ &fdw_private->is_program,
+ &fdw_private->options);
baserel->fdw_private = (void *) fdw_private;
/* Estimate relation size */
@@ -583,20 +603,25 @@ static void
fileExplainForeignScan(ForeignScanState *node, ExplainState *es)
{
char *filename;
+ bool is_program;
List *options;
- /* Fetch options --- we only need filename at this point */
+ /* Fetch options --- we only need filename and is_program at this point */
fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation),
- &filename, &options);
+ &filename, &is_program, &options);
- ExplainPropertyText("Foreign File", filename, es);
+ if (is_program)
+ ExplainPropertyText("Foreign Program", filename, es);
+ else
+ ExplainPropertyText("Foreign File", filename, es);
/* Suppress file size if we're not showing cost details */
if (es->costs)
{
struct stat stat_buf;
- if (stat(filename, &stat_buf) == 0)
+ if (!is_program &&
+ stat(filename, &stat_buf) == 0)
ExplainPropertyLong("Foreign File Size", (long) stat_buf.st_size,
es);
}
@@ -611,6 +636,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags)
{
ForeignScan *plan = (ForeignScan *) node->ss.ps.plan;
char *filename;
+ bool is_program;
List *options;
CopyState cstate;
FileFdwExecutionState *festate;
@@ -623,7 +649,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags)
/* Fetch options of foreign table */
fileGetOptions(RelationGetRelid(node->ss.ss_currentRelation),
- &filename, &options);
+ &filename, &is_program, &options);
/* Add any options from the plan (currently only convert_selectively) */
options = list_concat(options, plan->fdw_private);
@@ -635,7 +661,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags)
cstate = BeginCopyFrom(NULL,
node->ss.ss_currentRelation,
filename,
- false,
+ is_program,
NIL,
options);
@@ -645,6 +671,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags)
*/
festate = (FileFdwExecutionState *) palloc(sizeof(FileFdwExecutionState));
festate->filename = filename;
+ festate->is_program = is_program;
festate->options = options;
festate->cstate = cstate;
@@ -709,7 +736,7 @@ fileReScanForeignScan(ForeignScanState *node)
festate->cstate = BeginCopyFrom(NULL,
node->ss.ss_currentRelation,
festate->filename,
- false,
+ festate->is_program,
NIL,
festate->options);
}
@@ -738,11 +765,22 @@ fileAnalyzeForeignTable(Relation relation,
BlockNumber *totalpages)
{
char *filename;
+ bool is_program;
List *options;
struct stat stat_buf;
/* Fetch options of foreign table */
- fileGetOptions(RelationGetRelid(relation), &filename, &options);
+ fileGetOptions(RelationGetRelid(relation), &filename, &is_program, &options);
+
+ /*
+ * If this is a program instead of a file, just return false to skip
+ * analyzing the table. We could run the program and collect stats on
+ * whatever it currently returns, but it seems likely that in such cases
+ * the output would be too volatile for the stats to be useful. Maybe
+ * there should be an option to enable doing this?
+ */
+ if (is_program)
+ return false;
/*
* Get size of the file. (XXX if we fail here, would it be better to just
@@ -769,8 +807,8 @@ fileAnalyzeForeignTable(Relation relation,
/*
* fileIsForeignScanParallelSafe
- * Reading a file in a parallel worker should work just the same as
- * reading it in the leader, so mark scans safe.
+ * Reading a file, or external program, in a parallel worker should work
+ * just the same as reading it in the leader, so mark scans safe.
*/
static bool
fileIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel,
@@ -916,9 +954,10 @@ estimate_size(PlannerInfo *root, RelOptInfo *baserel,
/*
* Get size of the file. It might not be there at plan time, though, in
- * which case we have to use a default estimate.
+ * which case we have to use a default estimate. We also have to fall
+ * back to the default if using a program as the input.
*/
- if (stat(fdw_private->filename, &stat_buf) < 0)
+ if (fdw_private->is_program || stat(fdw_private->filename, &stat_buf) < 0)
stat_buf.st_size = 10 * BLCKSZ;
/*
@@ -1000,6 +1039,11 @@ estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
* that I/O costs are equivalent to a regular table file of the same size.
* However, we take per-tuple CPU costs as 10x of a seqscan, to account
* for the cost of parsing records.
+ *
+ * In the case of a program source, this calculation is even more divorced
+ * from reality, but we have no good alternative; and it's not clear that
+ * the numbers we produce here matter much anyway, since there's only one
+ * access path for the rel.
*/
run_cost += seq_page_cost * pages;
@@ -1036,6 +1080,7 @@ file_acquire_sample_rows(Relation onerel, int elevel,
bool *nulls;
bool found;
char *filename;
+ bool is_program;
List *options;
CopyState cstate;
ErrorContextCallback errcallback;
@@ -1050,12 +1095,12 @@ file_acquire_sample_rows(Relation onerel, int elevel,
nulls = (bool *) palloc(tupDesc->natts * sizeof(bool));
/* Fetch options of foreign table */
- fileGetOptions(RelationGetRelid(onerel), &filename, &options);
+ fileGetOptions(RelationGetRelid(onerel), &filename, &is_program, &options);
/*
* Create CopyState from FDW options.
*/
- cstate = BeginCopyFrom(NULL, onerel, filename, false, NIL, options);
+ cstate = BeginCopyFrom(NULL, onerel, filename, is_program, NIL, options);
/*
* Use per-tuple memory context to prevent leak of memory used to read
diff --git a/contrib/file_fdw/output/file_fdw.source b/contrib/file_fdw/output/file_fdw.source
index 6fa54409b9..01e2690a82 100644
--- a/contrib/file_fdw/output/file_fdw.source
+++ b/contrib/file_fdw/output/file_fdw.source
@@ -76,7 +76,7 @@ CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'csv', null '
'); -- ERROR
ERROR: COPY null representation cannot use newline or carriage return
CREATE FOREIGN TABLE tbl () SERVER file_server; -- ERROR
-ERROR: filename is required for file_fdw foreign tables
+ERROR: either filename or program is required for file_fdw foreign tables
CREATE FOREIGN TABLE agg_text (
a int2 CHECK (a >= 0),
b float4
@@ -132,7 +132,7 @@ ERROR: invalid option "force_not_null"
HINT: There are no valid options in this context.
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_not_null '*'); -- ERROR
ERROR: invalid option "force_not_null"
-HINT: Valid options in this context are: filename, format, header, delimiter, quote, escape, null, encoding
+HINT: Valid options in this context are: filename, program, format, header, delimiter, quote, escape, null, encoding
-- force_null is not allowed to be specified at any foreign object level:
ALTER FOREIGN DATA WRAPPER file_fdw OPTIONS (ADD force_null '*'); -- ERROR
ERROR: invalid option "force_null"
@@ -145,7 +145,7 @@ ERROR: invalid option "force_null"
HINT: There are no valid options in this context.
CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (force_null '*'); -- ERROR
ERROR: invalid option "force_null"
-HINT: Valid options in this context are: filename, format, header, delimiter, quote, escape, null, encoding
+HINT: Valid options in this context are: filename, program, format, header, delimiter, quote, escape, null, encoding
-- basic query tests
SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a;
a | b
diff --git a/doc/src/sgml/file-fdw.sgml b/doc/src/sgml/file-fdw.sgml
index d3b39aa120..309a303e03 100644
--- a/doc/src/sgml/file-fdw.sgml
+++ b/doc/src/sgml/file-fdw.sgml
@@ -10,10 +10,11 @@
The file_fdw> module provides the foreign-data wrapper
file_fdw, which can be used to access data
- files in the server's file system. Data files must be in a format
+ files in the server's file system, or to execute programs on the server
+ and read their output. The data file or program output must be in a format
that can be read by COPY FROM;
see for details.
- Access to such data files is currently read-only.
+ Access to data files is currently read-only.
@@ -27,7 +28,22 @@
- Specifies the file to be read. Required. Must be an absolute path name.
+ Specifies the file to be read. Must be an absolute path name.
+ Either filename or program must be
+ specified, but not both.
+
+
+
+
+
+ program
+
+
+
+ Specifies the command to be executed. The standard output of this
+ command will be read as though COPY FROM PROGRAM> were used.
+ Either program or filename must be
+ specified, but not both.
@@ -37,7 +53,7 @@
- Specifies the file's format,
+ Specifies the data format,
the same as COPY>'s FORMAT option.
@@ -48,7 +64,7 @@
- Specifies whether the file has a header line,
+ Specifies whether the data has a header line,
the same as COPY>'s HEADER option.
@@ -59,7 +75,7 @@
- Specifies the file's delimiter character,
+ Specifies the data delimiter character,
the same as COPY>'s DELIMITER option.
@@ -70,7 +86,7 @@
- Specifies the file's quote character,
+ Specifies the data quote character,
the same as COPY>'s QUOTE option.
@@ -81,7 +97,7 @@
- Specifies the file's escape character,
+ Specifies the data escape character,
the same as COPY>'s ESCAPE option.
@@ -92,7 +108,7 @@
- Specifies the file's null string,
+ Specifies the data null string,
the same as COPY>'s NULL option.
@@ -103,7 +119,7 @@
- Specifies the file's encoding,
+ Specifies the data encoding,
the same as COPY>'s ENCODING option.
@@ -112,11 +128,11 @@
- Note that while COPY> allows options such as OIDS and HEADER
- to be specified without a corresponding value, the foreign data wrapper
+ Note that while COPY> allows options such as HEADER>
+ to be specified without a corresponding value, the foreign table option
syntax requires a value to be present in all cases. To activate
- COPY> options normally supplied without a value, you can
- instead pass the value TRUE.
+ COPY> options typically written without a value, you can pass
+ the value TRUE, since all such options are Booleans.
@@ -133,7 +149,7 @@
This is a Boolean option. If true, it specifies that values of the
column should not be matched against the null string (that is, the
- file-level null option). This has the same effect
+ table-level null option). This has the same effect
as listing the column in COPY>'s
FORCE_NOT_NULL option.
@@ -171,14 +187,24 @@
Changing table-level options requires superuser privileges, for security
- reasons: only a superuser should be able to determine which file is read.
- In principle non-superusers could be allowed to change the other options,
- but that's not supported at present.
+ reasons: only a superuser should be able to control which file is read
+ or which program is run. In principle non-superusers could be allowed to
+ change the other options, but that's not supported at present.
+
+
+
+ When specifying the program> option, keep in mind that the option
+ string is executed by the shell. If you need to pass any arguments to the
+ command that come from an untrusted source, you must be careful to strip or
+ escape any characters that might have special meaning to the shell.
+ For security reasons, it is best to use a fixed command string, or at least
+ avoid passing any user input in it.
For a foreign table using file_fdw>, EXPLAIN> shows
- the name of the file to be read. Unless COSTS OFF> is
+ the name of the file to be read or program to be run.
+ For a file, unless COSTS OFF> is
specified, the file size (in bytes) is shown as well.
@@ -186,7 +212,7 @@
Create a Foreign Table for PostgreSQL CSV Logs
- One of the obvious uses for the file_fdw> is to make
+ One of the obvious uses for file_fdw> is to make
the PostgreSQL activity log available as a table for querying. To
do this, first you must be logging to a CSV file, which here we
will call pglog.csv>. First, install file_fdw>
--
cgit v1.2.3
From fd321a1dfd64d30bf1652ea6b39b654304f68ae4 Mon Sep 17 00:00:00 2001
From: Stephen Frost
Date: Thu, 29 Sep 2016 22:13:38 -0400
Subject: Remove superuser checks in pgstattuple
Now that we track initial privileges on extension objects and changes to
those permissions, we can drop the superuser() checks from the various
functions which are part of the pgstattuple extension and rely on the
GRANT system to control access to those functions.
Since a pg_upgrade will preserve the version of the extension which
existed prior to the upgrade, we can't simply modify the existing
functions but instead need to create new functions which remove the
checks and update the SQL-level functions to use the new functions
(and to REVOKE EXECUTE rights on those functions from PUBLIC).
Thanks to Tom and Andres for adding support for extensions to follow
update paths (see: 40b449a), allowing this patch to be much smaller
since no new base version script needed to be included.
Approach suggested by Noah.
Reviewed by Michael Paquier.
---
contrib/pgstattuple/Makefile | 7 +-
contrib/pgstattuple/pgstatapprox.c | 39 ++++++--
contrib/pgstattuple/pgstatindex.c | 122 ++++++++++++++++++++++++--
contrib/pgstattuple/pgstattuple--1.4--1.5.sql | 111 +++++++++++++++++++++++
contrib/pgstattuple/pgstattuple.c | 41 +++++++++
contrib/pgstattuple/pgstattuple.control | 2 +-
doc/src/sgml/pgstattuple.sgml | 8 ++
7 files changed, 315 insertions(+), 15 deletions(-)
create mode 100644 contrib/pgstattuple/pgstattuple--1.4--1.5.sql
(limited to 'doc/src')
diff --git a/contrib/pgstattuple/Makefile b/contrib/pgstattuple/Makefile
index e732680dea..294077d4c1 100644
--- a/contrib/pgstattuple/Makefile
+++ b/contrib/pgstattuple/Makefile
@@ -4,9 +4,10 @@ MODULE_big = pgstattuple
OBJS = pgstattuple.o pgstatindex.o pgstatapprox.o $(WIN32RES)
EXTENSION = pgstattuple
-DATA = pgstattuple--1.4.sql pgstattuple--1.3--1.4.sql \
- pgstattuple--1.2--1.3.sql pgstattuple--1.1--1.2.sql \
- pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql
+DATA = pgstattuple--1.4.sql pgstattuple--1.4--1.5.sql \
+ pgstattuple--1.3--1.4.sql pgstattuple--1.2--1.3.sql \
+ pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql \
+ pgstattuple--unpackaged--1.0.sql
PGFILEDESC = "pgstattuple - tuple-level statistics"
REGRESS = pgstattuple
diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c
index a49ff543d2..f524fc4e30 100644
--- a/contrib/pgstattuple/pgstatapprox.c
+++ b/contrib/pgstattuple/pgstatapprox.c
@@ -29,6 +29,9 @@
#include "commands/vacuum.h"
PG_FUNCTION_INFO_V1(pgstattuple_approx);
+PG_FUNCTION_INFO_V1(pgstattuple_approx_v1_5);
+
+Datum pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo);
typedef struct output_type
{
@@ -204,11 +207,42 @@ statapprox_heap(Relation rel, output_type *stat)
/*
* Returns estimated live/dead tuple statistics for the given relid.
+ *
+ * The superuser() check here must be kept as the library might be upgraded
+ * without the extension being upgraded, meaning that in pre-1.5 installations
+ * these functions could be called by any user.
*/
Datum
pgstattuple_approx(PG_FUNCTION_ARGS)
{
Oid relid = PG_GETARG_OID(0);
+
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ (errmsg("must be superuser to use pgstattuple functions"))));
+
+ PG_RETURN_DATUM(pgstattuple_approx_internal(relid, fcinfo));
+}
+
+/*
+ * As of pgstattuple version 1.5, we no longer need to check if the user
+ * is a superuser because we REVOKE EXECUTE on the SQL function from PUBLIC.
+ * Users can then grant access to it based on their policies.
+ *
+ * Otherwise identical to pgstattuple_approx (above).
+ */
+Datum
+pgstattuple_approx_v1_5(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+
+ PG_RETURN_DATUM(pgstattuple_approx_internal(relid, fcinfo));
+}
+
+Datum
+pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo)
+{
Relation rel;
output_type stat = {0};
TupleDesc tupdesc;
@@ -217,11 +251,6 @@ pgstattuple_approx(PG_FUNCTION_ARGS)
HeapTuple ret;
int i = 0;
- if (!superuser())
- ereport(ERROR,
- (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
- (errmsg("must be superuser to use pgstattuple functions"))));
-
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
elog(ERROR, "return type must be a row type");
diff --git a/contrib/pgstattuple/pgstatindex.c b/contrib/pgstattuple/pgstatindex.c
index 6084589e07..d9a722ac6b 100644
--- a/contrib/pgstattuple/pgstatindex.c
+++ b/contrib/pgstattuple/pgstatindex.c
@@ -54,6 +54,14 @@ PG_FUNCTION_INFO_V1(pg_relpages);
PG_FUNCTION_INFO_V1(pg_relpagesbyid);
PG_FUNCTION_INFO_V1(pgstatginindex);
+PG_FUNCTION_INFO_V1(pgstatindex_v1_5);
+PG_FUNCTION_INFO_V1(pgstatindexbyid_v1_5);
+PG_FUNCTION_INFO_V1(pg_relpages_v1_5);
+PG_FUNCTION_INFO_V1(pg_relpagesbyid_v1_5);
+PG_FUNCTION_INFO_V1(pgstatginindex_v1_5);
+
+Datum pgstatginindex_internal(Oid relid, FunctionCallInfo fcinfo);
+
#define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX)
#define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID)
#define IS_GIN(r) ((r)->rd_rel->relam == GIN_AM_OID)
@@ -99,6 +107,10 @@ static Datum pgstatindex_impl(Relation rel, FunctionCallInfo fcinfo);
* pgstatindex()
*
* Usage: SELECT * FROM pgstatindex('t1_pkey');
+ *
+ * The superuser() check here must be kept as the library might be upgraded
+ * without the extension being upgraded, meaning that in pre-1.5 installations
+ * these functions could be called by any user.
* ------------------------------------------------------
*/
Datum
@@ -119,6 +131,31 @@ pgstatindex(PG_FUNCTION_ARGS)
PG_RETURN_DATUM(pgstatindex_impl(rel, fcinfo));
}
+/*
+ * As of pgstattuple version 1.5, we no longer need to check if the user
+ * is a superuser because we REVOKE EXECUTE on the function from PUBLIC.
+ * Users can then grant access to it based on their policies.
+ *
+ * Otherwise identical to pgstatindex (above).
+ */
+Datum
+pgstatindex_v1_5(PG_FUNCTION_ARGS)
+{
+ text *relname = PG_GETARG_TEXT_P(0);
+ Relation rel;
+ RangeVar *relrv;
+
+ relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
+ rel = relation_openrv(relrv, AccessShareLock);
+
+ PG_RETURN_DATUM(pgstatindex_impl(rel, fcinfo));
+}
+
+/*
+ * The superuser() check here must be kept as the library might be upgraded
+ * without the extension being upgraded, meaning that in pre-1.5 installations
+ * these functions could be called by any user.
+ */
Datum
pgstatindexbyid(PG_FUNCTION_ARGS)
{
@@ -135,6 +172,18 @@ pgstatindexbyid(PG_FUNCTION_ARGS)
PG_RETURN_DATUM(pgstatindex_impl(rel, fcinfo));
}
+/* No need for superuser checks in v1.5, see above */
+Datum
+pgstatindexbyid_v1_5(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ Relation rel;
+
+ rel = relation_open(relid, AccessShareLock);
+
+ PG_RETURN_DATUM(pgstatindex_impl(rel, fcinfo));
+}
+
static Datum
pgstatindex_impl(Relation rel, FunctionCallInfo fcinfo)
{
@@ -292,6 +341,8 @@ pgstatindex_impl(Relation rel, FunctionCallInfo fcinfo)
*
* Usage: SELECT pg_relpages('t1');
* SELECT pg_relpages('t1_pkey');
+ *
+ * Must keep superuser() check, see above.
* --------------------------------------------------------
*/
Datum
@@ -319,6 +370,28 @@ pg_relpages(PG_FUNCTION_ARGS)
PG_RETURN_INT64(relpages);
}
+/* No need for superuser checks in v1.5, see above */
+Datum
+pg_relpages_v1_5(PG_FUNCTION_ARGS)
+{
+ text *relname = PG_GETARG_TEXT_P(0);
+ int64 relpages;
+ Relation rel;
+ RangeVar *relrv;
+
+ relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
+ rel = relation_openrv(relrv, AccessShareLock);
+
+ /* note: this will work OK on non-local temp tables */
+
+ relpages = RelationGetNumberOfBlocks(rel);
+
+ relation_close(rel, AccessShareLock);
+
+ PG_RETURN_INT64(relpages);
+}
+
+/* Must keep superuser() check, see above. */
Datum
pg_relpagesbyid(PG_FUNCTION_ARGS)
{
@@ -342,16 +415,58 @@ pg_relpagesbyid(PG_FUNCTION_ARGS)
PG_RETURN_INT64(relpages);
}
+/* No need for superuser checks in v1.5, see above */
+Datum
+pg_relpagesbyid_v1_5(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 relpages;
+ Relation rel;
+
+ rel = relation_open(relid, AccessShareLock);
+
+ /* note: this will work OK on non-local temp tables */
+
+ relpages = RelationGetNumberOfBlocks(rel);
+
+ relation_close(rel, AccessShareLock);
+
+ PG_RETURN_INT64(relpages);
+}
+
/* ------------------------------------------------------
* pgstatginindex()
*
* Usage: SELECT * FROM pgstatginindex('ginindex');
+ *
+ * Must keep superuser() check, see above.
* ------------------------------------------------------
*/
Datum
pgstatginindex(PG_FUNCTION_ARGS)
{
Oid relid = PG_GETARG_OID(0);
+
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ (errmsg("must be superuser to use pgstattuple functions"))));
+
+ PG_RETURN_DATUM(pgstatginindex_internal(relid, fcinfo));
+}
+
+/* No need for superuser checks in v1.5, see above */
+Datum
+pgstatginindex_v1_5(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+
+ PG_RETURN_DATUM(pgstatginindex_internal(relid, fcinfo));
+}
+
+Datum
+pgstatginindex_internal(Oid relid, FunctionCallInfo fcinfo)
+{
Relation rel;
Buffer buffer;
Page page;
@@ -363,11 +478,6 @@ pgstatginindex(PG_FUNCTION_ARGS)
bool nulls[3] = {false, false, false};
Datum result;
- if (!superuser())
- ereport(ERROR,
- (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
- (errmsg("must be superuser to use pgstattuple functions"))));
-
rel = relation_open(relid, AccessShareLock);
if (!IS_INDEX(rel) || !IS_GIN(rel))
@@ -415,5 +525,5 @@ pgstatginindex(PG_FUNCTION_ARGS)
tuple = heap_form_tuple(tupleDesc, values, nulls);
result = HeapTupleGetDatum(tuple);
- PG_RETURN_DATUM(result);
+ return (result);
}
diff --git a/contrib/pgstattuple/pgstattuple--1.4--1.5.sql b/contrib/pgstattuple/pgstattuple--1.4--1.5.sql
new file mode 100644
index 0000000000..65d7f19c2a
--- /dev/null
+++ b/contrib/pgstattuple/pgstattuple--1.4--1.5.sql
@@ -0,0 +1,111 @@
+/* contrib/pgstattuple/pgstattuple--1.4--1.5.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pgstattuple UPDATE TO '1.5'" to load this file. \quit
+
+CREATE OR REPLACE FUNCTION pgstattuple(IN relname text,
+ OUT table_len BIGINT, -- physical table length in bytes
+ OUT tuple_count BIGINT, -- number of live tuples
+ OUT tuple_len BIGINT, -- total tuples length in bytes
+ OUT tuple_percent FLOAT8, -- live tuples in %
+ OUT dead_tuple_count BIGINT, -- number of dead tuples
+ OUT dead_tuple_len BIGINT, -- total dead tuples length in bytes
+ OUT dead_tuple_percent FLOAT8, -- dead tuples in %
+ OUT free_space BIGINT, -- free space in bytes
+ OUT free_percent FLOAT8) -- free space in %
+AS 'MODULE_PATHNAME', 'pgstattuple_v1_5'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+REVOKE EXECUTE ON FUNCTION pgstattuple(text) FROM PUBLIC;
+
+CREATE OR REPLACE FUNCTION pgstatindex(IN relname text,
+ OUT version INT,
+ OUT tree_level INT,
+ OUT index_size BIGINT,
+ OUT root_block_no BIGINT,
+ OUT internal_pages BIGINT,
+ OUT leaf_pages BIGINT,
+ OUT empty_pages BIGINT,
+ OUT deleted_pages BIGINT,
+ OUT avg_leaf_density FLOAT8,
+ OUT leaf_fragmentation FLOAT8)
+AS 'MODULE_PATHNAME', 'pgstatindex_v1_5'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+REVOKE EXECUTE ON FUNCTION pgstatindex(text) FROM PUBLIC;
+
+CREATE OR REPLACE FUNCTION pg_relpages(IN relname text)
+RETURNS BIGINT
+AS 'MODULE_PATHNAME', 'pg_relpages_v1_5'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+REVOKE EXECUTE ON FUNCTION pg_relpages(text) FROM PUBLIC;
+
+/* New stuff in 1.1 begins here */
+
+CREATE OR REPLACE FUNCTION pgstatginindex(IN relname regclass,
+ OUT version INT4,
+ OUT pending_pages INT4,
+ OUT pending_tuples BIGINT)
+AS 'MODULE_PATHNAME', 'pgstatginindex_v1_5'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+REVOKE EXECUTE ON FUNCTION pgstatginindex(regclass) FROM PUBLIC;
+
+/* New stuff in 1.2 begins here */
+
+CREATE OR REPLACE FUNCTION pgstattuple(IN reloid regclass,
+ OUT table_len BIGINT, -- physical table length in bytes
+ OUT tuple_count BIGINT, -- number of live tuples
+ OUT tuple_len BIGINT, -- total tuples length in bytes
+ OUT tuple_percent FLOAT8, -- live tuples in %
+ OUT dead_tuple_count BIGINT, -- number of dead tuples
+ OUT dead_tuple_len BIGINT, -- total dead tuples length in bytes
+ OUT dead_tuple_percent FLOAT8, -- dead tuples in %
+ OUT free_space BIGINT, -- free space in bytes
+ OUT free_percent FLOAT8) -- free space in %
+AS 'MODULE_PATHNAME', 'pgstattuplebyid_v1_5'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+REVOKE EXECUTE ON FUNCTION pgstattuple(regclass) FROM PUBLIC;
+
+CREATE OR REPLACE FUNCTION pgstatindex(IN relname regclass,
+ OUT version INT,
+ OUT tree_level INT,
+ OUT index_size BIGINT,
+ OUT root_block_no BIGINT,
+ OUT internal_pages BIGINT,
+ OUT leaf_pages BIGINT,
+ OUT empty_pages BIGINT,
+ OUT deleted_pages BIGINT,
+ OUT avg_leaf_density FLOAT8,
+ OUT leaf_fragmentation FLOAT8)
+AS 'MODULE_PATHNAME', 'pgstatindexbyid_v1_5'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+REVOKE EXECUTE ON FUNCTION pgstatindex(regclass) FROM PUBLIC;
+
+CREATE OR REPLACE FUNCTION pg_relpages(IN relname regclass)
+RETURNS BIGINT
+AS 'MODULE_PATHNAME', 'pg_relpagesbyid_v1_5'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+REVOKE EXECUTE ON FUNCTION pg_relpages(regclass) FROM PUBLIC;
+
+/* New stuff in 1.3 begins here */
+
+CREATE OR REPLACE FUNCTION pgstattuple_approx(IN reloid regclass,
+ OUT table_len BIGINT, -- physical table length in bytes
+ OUT scanned_percent FLOAT8, -- what percentage of the table's pages was scanned
+ OUT approx_tuple_count BIGINT, -- estimated number of live tuples
+ OUT approx_tuple_len BIGINT, -- estimated total length in bytes of live tuples
+ OUT approx_tuple_percent FLOAT8, -- live tuples in % (based on estimate)
+ OUT dead_tuple_count BIGINT, -- exact number of dead tuples
+ OUT dead_tuple_len BIGINT, -- exact total length in bytes of dead tuples
+ OUT dead_tuple_percent FLOAT8, -- dead tuples in % (based on estimate)
+ OUT approx_free_space BIGINT, -- estimated free space in bytes
+ OUT approx_free_percent FLOAT8) -- free space in % (based on estimate)
+AS 'MODULE_PATHNAME', 'pgstattuple_approx_v1_5'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+REVOKE EXECUTE ON FUNCTION pgstattuple_approx(regclass) FROM PUBLIC;
diff --git a/contrib/pgstattuple/pgstattuple.c b/contrib/pgstattuple/pgstattuple.c
index c1122b496a..68b07aaf26 100644
--- a/contrib/pgstattuple/pgstattuple.c
+++ b/contrib/pgstattuple/pgstattuple.c
@@ -40,7 +40,9 @@
PG_MODULE_MAGIC;
PG_FUNCTION_INFO_V1(pgstattuple);
+PG_FUNCTION_INFO_V1(pgstattuple_v1_5);
PG_FUNCTION_INFO_V1(pgstattuplebyid);
+PG_FUNCTION_INFO_V1(pgstattuplebyid_v1_5);
/*
* struct pgstattuple_type
@@ -152,6 +154,10 @@ build_pgstattuple_type(pgstattuple_type *stat, FunctionCallInfo fcinfo)
*
* C FUNCTION definition
* pgstattuple(text) returns pgstattuple_type
+ *
+ * The superuser() check here must be kept as the library might be upgraded
+ * without the extension being upgraded, meaning that in pre-1.5 installations
+ * these functions could be called by any user.
* ----------
*/
@@ -174,6 +180,28 @@ pgstattuple(PG_FUNCTION_ARGS)
PG_RETURN_DATUM(pgstat_relation(rel, fcinfo));
}
+/*
+ * As of pgstattuple version 1.5, we no longer need to check if the user
+ * is a superuser because we REVOKE EXECUTE on the function from PUBLIC.
+ * Users can then grant access to it based on their policies.
+ *
+ * Otherwise identical to pgstattuple (above).
+ */
+Datum
+pgstattuple_v1_5(PG_FUNCTION_ARGS)
+{
+ text *relname = PG_GETARG_TEXT_P(0);
+ RangeVar *relrv;
+ Relation rel;
+
+ /* open relation */
+ relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
+ rel = relation_openrv(relrv, AccessShareLock);
+
+ PG_RETURN_DATUM(pgstat_relation(rel, fcinfo));
+}
+
+/* Must keep superuser() check, see above. */
Datum
pgstattuplebyid(PG_FUNCTION_ARGS)
{
@@ -191,6 +219,19 @@ pgstattuplebyid(PG_FUNCTION_ARGS)
PG_RETURN_DATUM(pgstat_relation(rel, fcinfo));
}
+/* Remove superuser() check for 1.5 version, see above */
+Datum
+pgstattuplebyid_v1_5(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ Relation rel;
+
+ /* open relation */
+ rel = relation_open(relid, AccessShareLock);
+
+ PG_RETURN_DATUM(pgstat_relation(rel, fcinfo));
+}
+
/*
* pgstat_relation
*/
diff --git a/contrib/pgstattuple/pgstattuple.control b/contrib/pgstattuple/pgstattuple.control
index fa328fd664..6af40757b2 100644
--- a/contrib/pgstattuple/pgstattuple.control
+++ b/contrib/pgstattuple/pgstattuple.control
@@ -1,5 +1,5 @@
# pgstattuple extension
comment = 'show tuple-level statistics'
-default_version = '1.4'
+default_version = '1.5'
module_pathname = '$libdir/pgstattuple'
relocatable = true
diff --git a/doc/src/sgml/pgstattuple.sgml b/doc/src/sgml/pgstattuple.sgml
index 61340bedbc..9ada5d209a 100644
--- a/doc/src/sgml/pgstattuple.sgml
+++ b/doc/src/sgml/pgstattuple.sgml
@@ -12,6 +12,14 @@
obtain tuple-level statistics.
+
+ As these functions return detailed page-level information, only the superuser
+ has EXECUTE privileges on them upon installation. After the functions have
+ been installed, users may issue GRANT commands to change
+ the privileges on the functions to allow non-superusers to execute them. See
+ the description of the command for specifics.
+
+
Functions
--
cgit v1.2.3
From 33596edf09516a7cab65914e16cfd6adf9fc55d1 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Sat, 1 Oct 2016 15:32:53 -0400
Subject: Copy-editing for contrib/pg_visibility documentation.
Add omitted names for some function parameters.
Fix some minor grammatical issues.
---
doc/src/sgml/pgvisibility.sgml | 87 +++++++++++++++++++++---------------------
1 file changed, 44 insertions(+), 43 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/pgvisibility.sgml b/doc/src/sgml/pgvisibility.sgml
index d764eff9a0..fd486696fc 100644
--- a/doc/src/sgml/pgvisibility.sgml
+++ b/doc/src/sgml/pgvisibility.sgml
@@ -9,31 +9,33 @@
The pg_visibility> module provides a means for examining the
- visibility map (VM) and page-level visibility information. It also
- provides functions to check the integrity of the visibility map and to
+ visibility map (VM) and page-level visibility information of a table.
+ It also provides functions to check the integrity of a visibility map and to
force it to be rebuilt.
Three different bits are used to store information about page-level
visibility. The all-visible bit in the visibility map indicates that every
- tuple on a given page of a relation is visible to every current transaction.
- The all-frozen bit in the visibility map indicates that every tuple on the
- page is frozen; that is, no future vacuum will need to modify the page
- until such time as a tuple is inserted, updated, deleted, or locked on
- that page. The page-level PD_ALL_VISIBLE bit has the
+ tuple in the corresponding page of the relation is visible to every current
+ and future transaction. The all-frozen bit in the visibility map indicates
+ that every tuple in the page is frozen; that is, no future vacuum will need
+ to modify the page until such time as a tuple is inserted, updated, deleted,
+ or locked on that page.
+ The page header's PD_ALL_VISIBLE bit has the
same meaning as the all-visible bit in the visibility map, but is stored
- within the data page itself rather than a separate data structure. These
- will normally agree, but the page-level bit can sometimes be set while the
- visibility map bit is clear after a crash recovery; or they can disagree
- because of a change which occurs after pg_visibility> examines
- the visibility map and before it examines the data page. Any event which
- causes data corruption can also cause these bits to disagree.
+ within the data page itself rather than in a separate data structure.
+ These two bits will normally agree, but the page's all-visible bit can
+ sometimes be set while the visibility map bit is clear after a crash
+ recovery. The reported values can also disagree because of a change that
+ occurs after pg_visibility> examines the visibility map and
+ before it examines the data page. Any event that causes data corruption
+ can also cause these bits to disagree.
- Functions which display information about PD_ALL_VISIBLE>
- are much more costly than those which only consult the visibility map,
+ Functions that display information about PD_ALL_VISIBLE> bits
+ are much more costly than those that only consult the visibility map,
because they must read the relation's data blocks rather than only the
(much smaller) visibility map. Functions that check the relation's
data blocks are similarly expensive.
@@ -44,7 +46,7 @@
- pg_visibility_map(regclass, blkno bigint, all_visible OUT boolean, all_frozen OUT boolean) returns record
+ pg_visibility_map(relation regclass, blkno bigint, all_visible OUT boolean, all_frozen OUT boolean) returns record
Returns the all-visible and all-frozen bits in the visibility map for
@@ -54,40 +56,40 @@
- pg_visibility(regclass, blkno bigint, all_visible OUT boolean, all_frozen OUT boolean, pd_all_visible OUT boolean) returns record
+ pg_visibility(relation regclass, blkno bigint, all_visible OUT boolean, all_frozen OUT boolean, pd_all_visible OUT boolean) returns record
Returns the all-visible and all-frozen bits in the visibility map for
the given block of the given relation, plus the
- PD_ALL_VISIBLE> bit for that block.
+ PD_ALL_VISIBLE> bit of that block.
- pg_visibility_map(regclass, blkno OUT bigint, all_visible OUT boolean, all_frozen OUT boolean) returns setof record
+ pg_visibility_map(relation regclass, blkno OUT bigint, all_visible OUT boolean, all_frozen OUT boolean) returns setof record
Returns the all-visible and all-frozen bits in the visibility map for
- each block the given relation.
+ each block of the given relation.
- pg_visibility(regclass, blkno OUT bigint, all_visible OUT boolean, all_frozen OUT boolean, pd_all_visible OUT boolean) returns setof record
+ pg_visibility(relation regclass, blkno OUT bigint, all_visible OUT boolean, all_frozen OUT boolean, pd_all_visible OUT boolean) returns setof record
Returns the all-visible and all-frozen bits in the visibility map for
- each block the given relation, plus the PD_ALL_VISIBLE>
- bit for each block.
+ each block of the given relation, plus the PD_ALL_VISIBLE>
+ bit of each block.
- pg_visibility_map_summary(regclass, all_visible OUT bigint, all_frozen OUT bigint) returns record
+ pg_visibility_map_summary(relation regclass, all_visible OUT bigint, all_frozen OUT bigint) returns record
@@ -96,50 +98,49 @@
-
+
- pg_check_frozen(regclass, t_ctid OUT tid) returns setof tid
+ pg_check_frozen(relation regclass, t_ctid OUT tid) returns setof tid
- Returns the TIDs of non-frozen tuples present in pages marked all-frozen
+ Returns the TIDs of non-frozen tuples stored in pages marked all-frozen
in the visibility map. If this function returns a non-empty set of
- TIDs, the database is corrupt.
+ TIDs, the visibility map is corrupt.
-
-
- pg_check_visible(regclass, t_ctid OUT tid) returns setof tid
+
+
+ pg_check_visible(relation regclass, t_ctid OUT tid) returns setof tid
- Returns the TIDs of tuples which are not all-visible despite the fact
- that the pages which contain them are marked as all-visible in the
- visibility map. If this function returns a non-empty set of TIDs, the
- database is corrupt.
+ Returns the TIDs of non-all-visible tuples stored in pages marked
+ all-visible in the visibility map. If this function returns a non-empty
+ set of TIDs, the visibility map is corrupt.
- pg_truncate_visibility_map(regclass) returns void
+ pg_truncate_visibility_map(relation regclass) returns void
- Truncates the visibility map for the given relation. This function
- is only expected to be useful if you suspect that the visibility map
- for the indicated relation is corrupt and wish to rebuild it. The first
- VACUUM> executed on the given relation after this function
- is executed will scan every page in the relation and rebuild the
- visibility map.
+ Truncates the visibility map for the given relation. This function is
+ useful if you believe that the visibility map for the relation is
+ corrupt and wish to force rebuilding it. The first VACUUM>
+ executed on the given relation after this function is executed will scan
+ every page in the relation and rebuild the visibility map. (Until that
+ is done, queries will treat the visibility map as containing all zeroes.)
- By default, these functions are not publicly executable.
+ By default, these functions are executable only by superusers.
--
cgit v1.2.3
From e8bdee2770ff52aab208bc6f8965a4a01979d0aa Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Sun, 2 Oct 2016 14:31:28 -0400
Subject: Add ALTER EXTENSION ADD/DROP ACCESS METHOD, and use it in pg_upgrade.
Without this, an extension containing an access method is not properly
dumped/restored during pg_upgrade --- the AM ends up not being a member
of the extension after upgrading.
Another oversight in commit 473b93287, reported by Andrew Dunstan.
Report:
---
doc/src/sgml/ref/alter_extension.sgml | 1 +
src/backend/parser/gram.y | 11 ++++++++++-
src/bin/pg_dump/pg_dump.c | 3 +++
3 files changed, 14 insertions(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/alter_extension.sgml b/doc/src/sgml/ref/alter_extension.sgml
index 7141ee352e..de6d6dca16 100644
--- a/doc/src/sgml/ref/alter_extension.sgml
+++ b/doc/src/sgml/ref/alter_extension.sgml
@@ -30,6 +30,7 @@ ALTER EXTENSION name DROP where member_object is:
+ ACCESS METHOD object_name |
AGGREGATE aggregate_name ( aggregate_signature ) |
CAST (source_type AS target_type) |
COLLATION object_name |
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 1526c73a1c..5547fc8658 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -3931,7 +3931,16 @@ alter_extension_opt_item:
*****************************************************************************/
AlterExtensionContentsStmt:
- ALTER EXTENSION name add_drop AGGREGATE func_name aggr_args
+ ALTER EXTENSION name add_drop ACCESS METHOD name
+ {
+ AlterExtensionContentsStmt *n = makeNode(AlterExtensionContentsStmt);
+ n->extname = $3;
+ n->action = $4;
+ n->objtype = OBJECT_ACCESS_METHOD;
+ n->objname = list_make1(makeString($7));
+ $$ = (Node *)n;
+ }
+ | ALTER EXTENSION name add_drop AGGREGATE func_name aggr_args
{
AlterExtensionContentsStmt *n = makeNode(AlterExtensionContentsStmt);
n->extname = $3;
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 51b8a1a622..299e88788e 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -12505,6 +12505,9 @@ dumpAccessMethod(Archive *fout, AccessMethodInfo *aminfo)
appendPQExpBuffer(labelq, "ACCESS METHOD %s",
qamname);
+ if (dopt->binary_upgrade)
+ binary_upgrade_extension_member(q, &aminfo->dobj, labelq->data);
+
if (aminfo->dobj.dump & DUMP_COMPONENT_DEFINITION)
ArchiveEntry(fout, aminfo->dobj.catId, aminfo->dobj.dumpId,
aminfo->dobj.name,
--
cgit v1.2.3
From 6f3bd98ebfc008cbd676da777bb0b2376c4c4bfa Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Tue, 4 Oct 2016 10:50:13 -0400
Subject: Extend framework from commit 53be0b1ad to report latch waits.
WaitLatch, WaitLatchOrSocket, and WaitEventSetWait now taken an
additional wait_event_info parameter; legal values are defined in
pgstat.h. This makes it possible to uniquely identify every point in
the core code where we are waiting for a latch; extensions can pass
WAIT_EXTENSION.
Because latches were the major wait primitive not previously covered
by this patch, it is now possible to see information in
pg_stat_activity on a large number of important wait events not
previously addressed, such as ClientRead, ClientWrite, and SyncRep.
Unfortunately, many of the wait events added by this patch will fail
to appear in pg_stat_activity because they're only used in background
processes which don't currently appear in pg_stat_activity. We should
fix this either by creating a separate view for such information, or
else by deciding to include them in pg_stat_activity after all.
Michael Paquier and Robert Haas, reviewed by Alexander Korotkov and
Thomas Munro.
---
contrib/postgres_fdw/connection.c | 3 +-
doc/src/sgml/monitoring.sgml | 169 ++++++++++++++++++++++++
src/backend/access/transam/parallel.c | 4 +-
src/backend/access/transam/xlog.c | 7 +-
src/backend/executor/nodeGather.c | 3 +-
src/backend/libpq/be-secure-openssl.c | 4 +-
src/backend/libpq/be-secure.c | 7 +-
src/backend/libpq/pqmq.c | 4 +-
src/backend/postmaster/autovacuum.c | 3 +-
src/backend/postmaster/bgworker.c | 7 +-
src/backend/postmaster/bgwriter.c | 5 +-
src/backend/postmaster/checkpointer.c | 3 +-
src/backend/postmaster/pgarch.c | 3 +-
src/backend/postmaster/pgstat.c | 236 ++++++++++++++++++++++++++++++++--
src/backend/postmaster/syslogger.c | 4 +-
src/backend/postmaster/walwriter.c | 3 +-
src/backend/replication/basebackup.c | 3 +-
src/backend/replication/syncrep.c | 4 +-
src/backend/replication/walreceiver.c | 7 +-
src/backend/replication/walsender.c | 9 +-
src/backend/storage/buffer/bufmgr.c | 7 +-
src/backend/storage/ipc/latch.c | 18 ++-
src/backend/storage/ipc/shm_mq.c | 7 +-
src/backend/storage/ipc/standby.c | 5 +-
src/backend/storage/lmgr/lock.c | 3 -
src/backend/storage/lmgr/lwlock.c | 6 +-
src/backend/storage/lmgr/predicate.c | 3 +-
src/backend/storage/lmgr/proc.c | 8 +-
src/backend/utils/adt/misc.c | 4 +-
src/include/pgstat.h | 99 ++++++++++++--
src/include/storage/latch.h | 9 +-
src/include/storage/lwlock.h | 2 +-
src/include/storage/proc.h | 2 +-
src/test/modules/test_shm_mq/setup.c | 3 +-
src/test/modules/test_shm_mq/test.c | 3 +-
35 files changed, 584 insertions(+), 83 deletions(-)
(limited to 'doc/src')
diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c
index 8ca1c1c898..9badfe6a7d 100644
--- a/contrib/postgres_fdw/connection.c
+++ b/contrib/postgres_fdw/connection.c
@@ -17,6 +17,7 @@
#include "access/xact.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
+#include "pgstat.h"
#include "storage/latch.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
@@ -496,7 +497,7 @@ pgfdw_get_result(PGconn *conn, const char *query)
wc = WaitLatchOrSocket(MyLatch,
WL_LATCH_SET | WL_SOCKET_READABLE,
PQsocket(conn),
- -1L);
+ -1L, WAIT_EXTENSION);
ResetLatch(MyLatch);
CHECK_FOR_INTERRUPTS();
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index f400785721..3de489e2f0 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -679,6 +679,42 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
buffer in question.
+
+
+ Activity>: The server process is idle. This is used by
+ system processes waiting for activity in their main processing loop.
+ wait_event> will identify the specific wait point.
+
+
+
+
+ Extension>: The server process is waiting for activity
+ in an extension module. This category is useful for modules to
+ track custom waiting points.
+
+
+
+
+ Client>: The server process is waiting for some activity
+ on a socket from user applications, and that the server expects
+ something to happen that is independent from its internal processes.
+ wait_event> will identify the specific wait point.
+
+
+
+
+ IPC>: The server process is waiting for some activity
+ from another process in the server. wait_event> will
+ identify the specific wait point.
+
+
+
+
+ Timeout>: The server process is waiting for a timeout
+ to expire. wait_event> will identify the specific wait
+ point.
+
+
@@ -1085,6 +1121,139 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
BufferPin>Waiting to acquire a pin on a buffer.
+
+ Activity>
+ ArchiverMain>
+ Waiting in main loop of the archiver process.
+
+
+ AutoVacuumMain>
+ Waiting in main loop of autovacuum launcher process.
+
+
+ BgWriterHibernate>
+ Waiting in background writer process, hibernating.
+
+
+ BgWriterMain>
+ Waiting in main loop of background writer process background worker.
+
+
+ CheckpointerMain>
+ Waiting in main loop of checkpointer process.
+
+
+ PgStatMain>
+ Waiting in main loop of the statistics collector process.
+
+
+ RecoveryWalAll>
+ Waiting for WAL from any kind of source (local, archive or stream) at recovery.
+
+
+ RecoveryWalStream>
+ Waiting for WAL from a stream at recovery.
+
+
+ SysLoggerMain>
+ Waiting in main loop of syslogger process.
+
+
+ WalReceiverMain>
+ Waiting in main loop of WAL receiver process.
+
+
+ WalSenderMain>
+ Waiting in main loop of WAL sender process.
+
+
+ WalWriterMain>
+ Waiting in main loop of WAL writer process.
+
+
+ Client>
+ ClientRead>
+ Waiting to read data from the client.
+
+
+ ClientWrite>
+ Waiting to write data from the client.
+
+
+ SSLOpenServer>
+ Waiting for SSL while attempting connection.
+
+
+ WalReceiverWaitStart>
+ Waiting for startup process to send initial data for streaming replication.
+
+
+ WalSenderWaitForWAL>
+ Waiting for WAL to be flushed in WAL sender process.
+
+
+ WalSenderWriteData>
+ Waiting for any activity when processing replies from WAL receiver in WAL sender process.
+
+
+ Extension>
+ Extension>
+ Waiting in an extension.
+
+
+ IPC>
+ BgWorkerShutdown>
+ Waiting for background worker to shut down.
+
+
+ BgWorkerStartup>
+ Waiting for background worker to start up.
+
+
+ ExecuteGather>
+ Waiting for activity from child process when executing Gather> node.
+
+
+ MessageQueueInternal>
+ Waiting for other process to be attached in shared message queue.
+
+
+ MessageQueuePutMessage>
+ Waiting to write a protoocol message to a shared message queue.
+
+
+ MessageQueueReceive>
+ Waiting to receive bytes from a shared message queue.
+
+
+ MessageQueueSend>
+ Waiting to send bytes to a shared message queue.
+
+
+ ParallelFinish>
+ Waiting for parallel workers to finish computing.
+
+
+ SafeSnapshot>
+ Waiting for a snapshot for a READ ONLY DEFERRABLE> transaction.
+
+
+ SyncRep>
+ Waiting for confirmation from remote server during synchronous replication.
+
+
+ Timeout>
+ BaseBackupThrottle>
+ Waiting during base backup when throttling activity.
+
+
+ PgSleep>
+ Waiting in process that called pg_sleep>.
+
+
+ RecoveryApplyDelay>
+ Waiting to apply WAL at recovery because it is delayed.
+
diff --git a/src/backend/access/transam/parallel.c b/src/backend/access/transam/parallel.c
index cde0ed300f..59dc3949d8 100644
--- a/src/backend/access/transam/parallel.c
+++ b/src/backend/access/transam/parallel.c
@@ -24,6 +24,7 @@
#include "libpq/pqmq.h"
#include "miscadmin.h"
#include "optimizer/planmain.h"
+#include "pgstat.h"
#include "storage/ipc.h"
#include "storage/sinval.h"
#include "storage/spin.h"
@@ -540,7 +541,8 @@ WaitForParallelWorkersToFinish(ParallelContext *pcxt)
if (!anyone_alive)
break;
- WaitLatch(&MyProc->procLatch, WL_LATCH_SET, -1);
+ WaitLatch(&MyProc->procLatch, WL_LATCH_SET, -1,
+ WAIT_EVENT_PARALLEL_FINISH);
ResetLatch(&MyProc->procLatch);
}
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index c1b9a97147..08c87f91be 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -5827,7 +5827,8 @@ recoveryApplyDelay(XLogReaderState *record)
WaitLatch(&XLogCtl->recoveryWakeupLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
- secs * 1000L + microsecs / 1000);
+ secs * 1000L + microsecs / 1000,
+ WAIT_EVENT_RECOVERY_APPLY_DELAY);
}
return true;
}
@@ -11387,7 +11388,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
WaitLatch(&XLogCtl->recoveryWakeupLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
- wait_time);
+ wait_time, WAIT_EVENT_RECOVERY_WAL_STREAM);
ResetLatch(&XLogCtl->recoveryWakeupLatch);
now = GetCurrentTimestamp();
}
@@ -11550,7 +11551,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
*/
WaitLatch(&XLogCtl->recoveryWakeupLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
- 5000L);
+ 5000L, WAIT_EVENT_RECOVERY_WAL_ALL);
ResetLatch(&XLogCtl->recoveryWakeupLatch);
break;
}
diff --git a/src/backend/executor/nodeGather.c b/src/backend/executor/nodeGather.c
index 438d1b24fc..880ca62397 100644
--- a/src/backend/executor/nodeGather.c
+++ b/src/backend/executor/nodeGather.c
@@ -38,6 +38,7 @@
#include "executor/nodeSubplan.h"
#include "executor/tqueue.h"
#include "miscadmin.h"
+#include "pgstat.h"
#include "utils/memutils.h"
#include "utils/rel.h"
@@ -387,7 +388,7 @@ gather_readnext(GatherState *gatherstate)
return NULL;
/* Nothing to do except wait for developments. */
- WaitLatch(MyLatch, WL_LATCH_SET, 0);
+ WaitLatch(MyLatch, WL_LATCH_SET, 0, WAIT_EVENT_EXECUTE_GATHER);
ResetLatch(MyLatch);
nvisited = 0;
}
diff --git a/src/backend/libpq/be-secure-openssl.c b/src/backend/libpq/be-secure-openssl.c
index fedb02cd82..668f217bba 100644
--- a/src/backend/libpq/be-secure-openssl.c
+++ b/src/backend/libpq/be-secure-openssl.c
@@ -60,6 +60,7 @@
#include "libpq/libpq.h"
#include "miscadmin.h"
+#include "pgstat.h"
#include "storage/latch.h"
#include "tcop/tcopprot.h"
#include "utils/memutils.h"
@@ -419,7 +420,8 @@ aloop:
else
waitfor = WL_SOCKET_WRITEABLE;
- WaitLatchOrSocket(MyLatch, waitfor, port->sock, 0);
+ WaitLatchOrSocket(MyLatch, waitfor, port->sock, 0,
+ WAIT_EVENT_SSL_OPEN_SERVER);
goto aloop;
case SSL_ERROR_SYSCALL:
if (r < 0)
diff --git a/src/backend/libpq/be-secure.c b/src/backend/libpq/be-secure.c
index cdd07d577b..b267507de9 100644
--- a/src/backend/libpq/be-secure.c
+++ b/src/backend/libpq/be-secure.c
@@ -33,6 +33,7 @@
#include "libpq/libpq.h"
#include "miscadmin.h"
+#include "pgstat.h"
#include "tcop/tcopprot.h"
#include "utils/memutils.h"
#include "storage/ipc.h"
@@ -146,7 +147,8 @@ retry:
ModifyWaitEvent(FeBeWaitSet, 0, waitfor, NULL);
- WaitEventSetWait(FeBeWaitSet, -1 /* no timeout */ , &event, 1);
+ WaitEventSetWait(FeBeWaitSet, -1 /* no timeout */ , &event, 1,
+ WAIT_EVENT_CLIENT_READ);
/*
* If the postmaster has died, it's not safe to continue running,
@@ -247,7 +249,8 @@ retry:
ModifyWaitEvent(FeBeWaitSet, 0, waitfor, NULL);
- WaitEventSetWait(FeBeWaitSet, -1 /* no timeout */ , &event, 1);
+ WaitEventSetWait(FeBeWaitSet, -1 /* no timeout */ , &event, 1,
+ WAIT_EVENT_CLIENT_WRITE);
/* See comments in secure_read. */
if (event.events & WL_POSTMASTER_DEATH)
diff --git a/src/backend/libpq/pqmq.c b/src/backend/libpq/pqmq.c
index bfe66c6c44..f93ccae148 100644
--- a/src/backend/libpq/pqmq.c
+++ b/src/backend/libpq/pqmq.c
@@ -17,6 +17,7 @@
#include "libpq/pqformat.h"
#include "libpq/pqmq.h"
#include "miscadmin.h"
+#include "pgstat.h"
#include "tcop/tcopprot.h"
#include "utils/builtins.h"
@@ -171,7 +172,8 @@ mq_putmessage(char msgtype, const char *s, size_t len)
if (result != SHM_MQ_WOULD_BLOCK)
break;
- WaitLatch(&MyProc->procLatch, WL_LATCH_SET, 0);
+ WaitLatch(&MyProc->procLatch, WL_LATCH_SET, 0,
+ WAIT_EVENT_MQ_PUT_MESSAGE);
ResetLatch(&MyProc->procLatch);
CHECK_FOR_INTERRUPTS();
}
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 1a92ca1deb..e3a6911fba 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -598,7 +598,8 @@ AutoVacLauncherMain(int argc, char *argv[])
*/
rc = WaitLatch(MyLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
- (nap.tv_sec * 1000L) + (nap.tv_usec / 1000L));
+ (nap.tv_sec * 1000L) + (nap.tv_usec / 1000L),
+ WAIT_EVENT_AUTOVACUUM_MAIN);
ResetLatch(MyLatch);
diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c
index 699c934240..028a9eed2d 100644
--- a/src/backend/postmaster/bgworker.c
+++ b/src/backend/postmaster/bgworker.c
@@ -18,6 +18,7 @@
#include "libpq/pqsignal.h"
#include "postmaster/bgworker_internals.h"
#include "postmaster/postmaster.h"
+#include "pgstat.h"
#include "storage/barrier.h"
#include "storage/dsm.h"
#include "storage/ipc.h"
@@ -969,7 +970,8 @@ WaitForBackgroundWorkerStartup(BackgroundWorkerHandle *handle, pid_t *pidp)
break;
rc = WaitLatch(MyLatch,
- WL_LATCH_SET | WL_POSTMASTER_DEATH, 0);
+ WL_LATCH_SET | WL_POSTMASTER_DEATH, 0,
+ WAIT_EVENT_BGWORKER_STARTUP);
if (rc & WL_POSTMASTER_DEATH)
{
@@ -1008,7 +1010,8 @@ WaitForBackgroundWorkerShutdown(BackgroundWorkerHandle *handle)
break;
rc = WaitLatch(&MyProc->procLatch,
- WL_LATCH_SET | WL_POSTMASTER_DEATH, 0);
+ WL_LATCH_SET | WL_POSTMASTER_DEATH, 0,
+ WAIT_EVENT_BGWORKER_SHUTDOWN);
if (rc & WL_POSTMASTER_DEATH)
{
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index 10020349a2..c3f33561da 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -345,7 +345,7 @@ BackgroundWriterMain(void)
*/
rc = WaitLatch(MyLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
- BgWriterDelay /* ms */ );
+ BgWriterDelay /* ms */, WAIT_EVENT_BGWRITER_MAIN);
/*
* If no latch event and BgBufferSync says nothing's happening, extend
@@ -372,7 +372,8 @@ BackgroundWriterMain(void)
/* Sleep ... */
rc = WaitLatch(MyLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
- BgWriterDelay * HIBERNATE_FACTOR);
+ BgWriterDelay * HIBERNATE_FACTOR,
+ WAIT_EVENT_BGWRITER_HIBERNATE);
/* Reset the notification request in case we timed out */
StrategyNotifyBgWriter(-1);
}
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index d702a4864d..397267c6b7 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -556,7 +556,8 @@ CheckpointerMain(void)
rc = WaitLatch(MyLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
- cur_timeout * 1000L /* convert to ms */ );
+ cur_timeout * 1000L /* convert to ms */,
+ WAIT_EVENT_CHECKPOINTER_MAIN);
/*
* Emergency bailout if postmaster has died. This is to avoid the
diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
index 1aa6466d67..62783d9259 100644
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -390,7 +390,8 @@ pgarch_MainLoop(void)
rc = WaitLatch(MyLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
- timeout * 1000L);
+ timeout * 1000L,
+ WAIT_EVENT_ARCHIVER_MAIN);
if (rc & WL_TIMEOUT)
wakened = true;
}
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 96578dcedb..8c9d06fdaa 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -276,6 +276,11 @@ static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
static void pgstat_setup_memcxt(void);
+static const char *pgstat_get_wait_activity(WaitEventActivity w);
+static const char *pgstat_get_wait_client(WaitEventClient w);
+static const char *pgstat_get_wait_ipc(WaitEventIPC w);
+static const char *pgstat_get_wait_timeout(WaitEventTimeout w);
+
static void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype);
static void pgstat_send(void *msg, int len);
@@ -3131,15 +3136,14 @@ pgstat_read_current_status(void)
const char *
pgstat_get_wait_event_type(uint32 wait_event_info)
{
- uint8 classId;
+ uint32 classId;
const char *event_type;
/* report process as not waiting. */
if (wait_event_info == 0)
return NULL;
- wait_event_info = wait_event_info >> 24;
- classId = wait_event_info & 0XFF;
+ classId = wait_event_info & 0xFF000000;
switch (classId)
{
@@ -3155,6 +3159,21 @@ pgstat_get_wait_event_type(uint32 wait_event_info)
case WAIT_BUFFER_PIN:
event_type = "BufferPin";
break;
+ case WAIT_ACTIVITY:
+ event_type = "Activity";
+ break;
+ case WAIT_CLIENT:
+ event_type = "Client";
+ break;
+ case WAIT_EXTENSION:
+ event_type = "Extension";
+ break;
+ case WAIT_IPC:
+ event_type = "IPC";
+ break;
+ case WAIT_TIMEOUT:
+ event_type = "Timeout";
+ break;
default:
event_type = "???";
break;
@@ -3172,7 +3191,7 @@ pgstat_get_wait_event_type(uint32 wait_event_info)
const char *
pgstat_get_wait_event(uint32 wait_event_info)
{
- uint8 classId;
+ uint32 classId;
uint16 eventId;
const char *event_name;
@@ -3180,9 +3199,8 @@ pgstat_get_wait_event(uint32 wait_event_info)
if (wait_event_info == 0)
return NULL;
- eventId = wait_event_info & ((1 << 24) - 1);
- wait_event_info = wait_event_info >> 24;
- classId = wait_event_info & 0XFF;
+ classId = wait_event_info & 0xFF000000;
+ eventId = wait_event_info & 0x0000FFFF;
switch (classId)
{
@@ -3196,6 +3214,37 @@ pgstat_get_wait_event(uint32 wait_event_info)
case WAIT_BUFFER_PIN:
event_name = "BufferPin";
break;
+ case WAIT_ACTIVITY:
+ {
+ WaitEventActivity w = (WaitEventActivity) wait_event_info;
+
+ event_name = pgstat_get_wait_activity(w);
+ break;
+ }
+ case WAIT_CLIENT:
+ {
+ WaitEventClient w = (WaitEventClient) wait_event_info;
+
+ event_name = pgstat_get_wait_client(w);
+ break;
+ }
+ case WAIT_EXTENSION:
+ event_name = "Extension";
+ break;
+ case WAIT_IPC:
+ {
+ WaitEventIPC w = (WaitEventIPC) wait_event_info;
+
+ event_name = pgstat_get_wait_ipc(w);
+ break;
+ }
+ case WAIT_TIMEOUT:
+ {
+ WaitEventTimeout w = (WaitEventTimeout) wait_event_info;
+
+ event_name = pgstat_get_wait_timeout(w);
+ break;
+ }
default:
event_name = "unknown wait event";
break;
@@ -3204,6 +3253,175 @@ pgstat_get_wait_event(uint32 wait_event_info)
return event_name;
}
+/* ----------
+ * pgstat_get_wait_activity() -
+ *
+ * Convert WaitEventActivity to string.
+ * ----------
+ */
+static const char *
+pgstat_get_wait_activity(WaitEventActivity w)
+{
+ const char *event_name = "unknown wait event";
+
+ switch (w)
+ {
+ case WAIT_EVENT_ARCHIVER_MAIN:
+ event_name = "ArchiverMain";
+ break;
+ case WAIT_EVENT_AUTOVACUUM_MAIN:
+ event_name = "AutoVacuumMain";
+ break;
+ case WAIT_EVENT_BGWRITER_HIBERNATE:
+ event_name = "BgWriterHibernate";
+ break;
+ case WAIT_EVENT_BGWRITER_MAIN:
+ event_name = "BgWriterMain";
+ break;
+ case WAIT_EVENT_CHECKPOINTER_MAIN:
+ event_name = "CheckpointerMain";
+ break;
+ case WAIT_EVENT_PGSTAT_MAIN:
+ event_name = "PgStatMain";
+ break;
+ case WAIT_EVENT_RECOVERY_WAL_ALL:
+ event_name = "RecoveryWalAll";
+ break;
+ case WAIT_EVENT_RECOVERY_WAL_STREAM:
+ event_name = "RecoveryWalStream";
+ break;
+ case WAIT_EVENT_SYSLOGGER_MAIN:
+ event_name = "SysLoggerMain";
+ break;
+ case WAIT_EVENT_WAL_RECEIVER_MAIN:
+ event_name = "WalReceiverMain";
+ break;
+ case WAIT_EVENT_WAL_SENDER_MAIN:
+ event_name = "WalSenderMain";
+ break;
+ case WAIT_EVENT_WAL_WRITER_MAIN:
+ event_name = "WalWriterMain";
+ break;
+ /* no default case, so that compiler will warn */
+ }
+
+ return event_name;
+}
+
+/* ----------
+ * pgstat_get_wait_client() -
+ *
+ * Convert WaitEventClient to string.
+ * ----------
+ */
+static const char *
+pgstat_get_wait_client(WaitEventClient w)
+{
+ const char *event_name = "unknown wait event";
+
+ switch (w)
+ {
+ case WAIT_EVENT_CLIENT_READ:
+ event_name = "ClientRead";
+ break;
+ case WAIT_EVENT_CLIENT_WRITE:
+ event_name = "ClientWrite";
+ break;
+ case WAIT_EVENT_SSL_OPEN_SERVER:
+ event_name = "SSLOpenServer";
+ break;
+ case WAIT_EVENT_WAL_RECEIVER_WAIT_START:
+ event_name = "WalReceiverWaitStart";
+ break;
+ case WAIT_EVENT_WAL_SENDER_WAIT_WAL:
+ event_name = "WalSenderWaitForWAL";
+ break;
+ case WAIT_EVENT_WAL_SENDER_WRITE_DATA:
+ event_name = "WalSenderWriteData";
+ break;
+ /* no default case, so that compiler will warn */
+ }
+
+ return event_name;
+}
+
+/* ----------
+ * pgstat_get_wait_ipc() -
+ *
+ * Convert WaitEventIPC to string.
+ * ----------
+ */
+static const char *
+pgstat_get_wait_ipc(WaitEventIPC w)
+{
+ const char *event_name = "unknown wait event";
+
+ switch (w)
+ {
+ case WAIT_EVENT_BGWORKER_SHUTDOWN:
+ event_name = "BgWorkerShutdown";
+ break;
+ case WAIT_EVENT_BGWORKER_STARTUP:
+ event_name = "BgWorkerStartup";
+ break;
+ case WAIT_EVENT_EXECUTE_GATHER:
+ event_name = "ExecuteGather";
+ break;
+ case WAIT_EVENT_MQ_INTERNAL:
+ event_name = "MessageQueueInternal";
+ break;
+ case WAIT_EVENT_MQ_PUT_MESSAGE:
+ event_name = "MessageQueuePutMessage";
+ break;
+ case WAIT_EVENT_MQ_RECEIVE:
+ event_name = "MessageQueueReceive";
+ break;
+ case WAIT_EVENT_MQ_SEND:
+ event_name = "MessageQueueSend";
+ break;
+ case WAIT_EVENT_PARALLEL_FINISH:
+ event_name = "ParallelFinish";
+ break;
+ case WAIT_EVENT_SAFE_SNAPSHOT:
+ event_name = "SafeSnapshot";
+ break;
+ case WAIT_EVENT_SYNC_REP:
+ event_name = "SyncRep";
+ break;
+ /* no default case, so that compiler will warn */
+ }
+
+ return event_name;
+}
+
+/* ----------
+ * pgstat_get_wait_timeout() -
+ *
+ * Convert WaitEventTimeout to string.
+ * ----------
+ */
+static const char *
+pgstat_get_wait_timeout(WaitEventTimeout w)
+{
+ const char *event_name = "unknown wait event";
+
+ switch (w)
+ {
+ case WAIT_EVENT_BASE_BACKUP_THROTTLE:
+ event_name = "BaseBackupThrottle";
+ break;
+ case WAIT_EVENT_PG_SLEEP:
+ event_name = "PgSleep";
+ break;
+ case WAIT_EVENT_RECOVERY_APPLY_DELAY:
+ event_name = "RecoveryApplyDelay";
+ break;
+ /* no default case, so that compiler will warn */
+ }
+
+ return event_name;
+}
+
/* ----------
* pgstat_get_backend_current_activity() -
*
@@ -3684,8 +3902,8 @@ PgstatCollectorMain(int argc, char *argv[])
#ifndef WIN32
wr = WaitLatchOrSocket(MyLatch,
WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_SOCKET_READABLE,
- pgStatSock,
- -1L);
+ pgStatSock, -1L,
+ WAIT_EVENT_PGSTAT_MAIN);
#else
/*
diff --git a/src/backend/postmaster/syslogger.c b/src/backend/postmaster/syslogger.c
index e7e488a236..af7136760a 100644
--- a/src/backend/postmaster/syslogger.c
+++ b/src/backend/postmaster/syslogger.c
@@ -35,6 +35,7 @@
#include "libpq/pqsignal.h"
#include "miscadmin.h"
#include "nodes/pg_list.h"
+#include "pgstat.h"
#include "pgtime.h"
#include "postmaster/fork_process.h"
#include "postmaster/postmaster.h"
@@ -424,7 +425,8 @@ SysLoggerMain(int argc, char *argv[])
rc = WaitLatchOrSocket(MyLatch,
WL_LATCH_SET | WL_SOCKET_READABLE | cur_flags,
syslogPipe[0],
- cur_timeout);
+ cur_timeout,
+ WAIT_EVENT_SYSLOGGER_MAIN);
if (rc & WL_SOCKET_READABLE)
{
diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c
index 11ec56aebb..67dcff63b1 100644
--- a/src/backend/postmaster/walwriter.c
+++ b/src/backend/postmaster/walwriter.c
@@ -290,7 +290,8 @@ WalWriterMain(void)
rc = WaitLatch(MyLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
- cur_timeout);
+ cur_timeout,
+ WAIT_EVENT_WAL_WRITER_MAIN);
/*
* Emergency bailout if postmaster has died. This is to avoid the
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index 1eabaef492..fa75930c9f 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -1364,7 +1364,8 @@ throttle(size_t increment)
*/
wait_result = WaitLatch(MyLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
- (long) (sleep / 1000));
+ (long) (sleep / 1000),
+ WAIT_EVENT_BASE_BACKUP_THROTTLE);
if (wait_result & WL_LATCH_SET)
CHECK_FOR_INTERRUPTS();
diff --git a/src/backend/replication/syncrep.c b/src/backend/replication/syncrep.c
index b442d061ec..ac29f567c3 100644
--- a/src/backend/replication/syncrep.c
+++ b/src/backend/replication/syncrep.c
@@ -61,6 +61,7 @@
#include "access/xact.h"
#include "miscadmin.h"
+#include "pgstat.h"
#include "replication/syncrep.h"
#include "replication/walsender.h"
#include "replication/walsender_private.h"
@@ -258,7 +259,8 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
* Wait on latch. Any condition that should wake us up will set the
* latch, so no need for timeout.
*/
- WaitLatch(MyLatch, WL_LATCH_SET | WL_POSTMASTER_DEATH, -1);
+ WaitLatch(MyLatch, WL_LATCH_SET | WL_POSTMASTER_DEATH, -1,
+ WAIT_EVENT_SYNC_REP);
}
/*
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 413ee3a5c1..eed6effeeb 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -55,6 +55,7 @@
#include "libpq/pqformat.h"
#include "libpq/pqsignal.h"
#include "miscadmin.h"
+#include "pgstat.h"
#include "replication/walreceiver.h"
#include "replication/walsender.h"
#include "storage/ipc.h"
@@ -486,7 +487,8 @@ WalReceiverMain(void)
WL_POSTMASTER_DEATH | WL_SOCKET_READABLE |
WL_TIMEOUT | WL_LATCH_SET,
wait_fd,
- NAPTIME_PER_CYCLE);
+ NAPTIME_PER_CYCLE,
+ WAIT_EVENT_WAL_RECEIVER_MAIN);
if (rc & WL_LATCH_SET)
{
ResetLatch(&walrcv->latch);
@@ -685,7 +687,8 @@ WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI)
}
SpinLockRelease(&walrcv->mutex);
- WaitLatch(&walrcv->latch, WL_LATCH_SET | WL_POSTMASTER_DEATH, 0);
+ WaitLatch(&walrcv->latch, WL_LATCH_SET | WL_POSTMASTER_DEATH, 0,
+ WAIT_EVENT_WAL_RECEIVER_WAIT_START);
}
if (update_process_title)
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index c7743da034..0f3ced250c 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -1146,7 +1146,8 @@ WalSndWriteData(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
/* Sleep until something happens or we time out */
WaitLatchOrSocket(MyLatch, wakeEvents,
- MyProcPort->sock, sleeptime);
+ MyProcPort->sock, sleeptime,
+ WAIT_EVENT_WAL_SENDER_WRITE_DATA);
}
/* reactivate latch so WalSndLoop knows to continue */
@@ -1272,7 +1273,8 @@ WalSndWaitForWal(XLogRecPtr loc)
/* Sleep until something happens or we time out */
WaitLatchOrSocket(MyLatch, wakeEvents,
- MyProcPort->sock, sleeptime);
+ MyProcPort->sock, sleeptime,
+ WAIT_EVENT_WAL_SENDER_WAIT_WAL);
}
/* reactivate latch so WalSndLoop knows to continue */
@@ -1924,7 +1926,8 @@ WalSndLoop(WalSndSendDataCallback send_data)
/* Sleep until something happens or we time out */
WaitLatchOrSocket(MyLatch, wakeEvents,
- MyProcPort->sock, sleeptime);
+ MyProcPort->sock, sleeptime,
+ WAIT_EVENT_WAL_SENDER_MAIN);
}
}
return;
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 90804a3c53..91dc24c301 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -3635,9 +3635,6 @@ LockBufferForCleanup(Buffer buffer)
UnlockBufHdr(bufHdr, buf_state);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
- /* Report the wait */
- pgstat_report_wait_start(WAIT_BUFFER_PIN, 0);
-
/* Wait to be signaled by UnpinBuffer() */
if (InHotStandby)
{
@@ -3649,9 +3646,7 @@ LockBufferForCleanup(Buffer buffer)
SetStartupBufferPinWaitBufId(-1);
}
else
- ProcWaitForSignal();
-
- pgstat_report_wait_end();
+ ProcWaitForSignal(WAIT_BUFFER_PIN);
/*
* Remove flag marking us as waiter. Normally this will not be set
diff --git a/src/backend/storage/ipc/latch.c b/src/backend/storage/ipc/latch.c
index 9def8a12d3..8488f944de 100644
--- a/src/backend/storage/ipc/latch.c
+++ b/src/backend/storage/ipc/latch.c
@@ -55,6 +55,7 @@
#endif
#include "miscadmin.h"
+#include "pgstat.h"
#include "portability/instr_time.h"
#include "postmaster/postmaster.h"
#include "storage/barrier.h"
@@ -297,9 +298,11 @@ DisownLatch(volatile Latch *latch)
* we return all of them in one call, but we will return at least one.
*/
int
-WaitLatch(volatile Latch *latch, int wakeEvents, long timeout)
+WaitLatch(volatile Latch *latch, int wakeEvents, long timeout,
+ uint32 wait_event_info)
{
- return WaitLatchOrSocket(latch, wakeEvents, PGINVALID_SOCKET, timeout);
+ return WaitLatchOrSocket(latch, wakeEvents, PGINVALID_SOCKET, timeout,
+ wait_event_info);
}
/*
@@ -316,7 +319,7 @@ WaitLatch(volatile Latch *latch, int wakeEvents, long timeout)
*/
int
WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
- long timeout)
+ long timeout, uint32 wait_event_info)
{
int ret = 0;
int rc;
@@ -344,7 +347,7 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
AddWaitEventToSet(set, ev, sock, NULL, NULL);
}
- rc = WaitEventSetWait(set, timeout, &event, 1);
+ rc = WaitEventSetWait(set, timeout, &event, 1, wait_event_info);
if (rc == 0)
ret |= WL_TIMEOUT;
@@ -863,7 +866,8 @@ WaitEventAdjustWin32(WaitEventSet *set, WaitEvent *event)
*/
int
WaitEventSetWait(WaitEventSet *set, long timeout,
- WaitEvent *occurred_events, int nevents)
+ WaitEvent *occurred_events, int nevents,
+ uint32 wait_event_info)
{
int returned_events = 0;
instr_time start_time;
@@ -883,6 +887,8 @@ WaitEventSetWait(WaitEventSet *set, long timeout,
cur_timeout = timeout;
}
+ pgstat_report_wait_start(wait_event_info);
+
#ifndef WIN32
waiting = true;
#else
@@ -960,6 +966,8 @@ WaitEventSetWait(WaitEventSet *set, long timeout,
waiting = false;
#endif
+ pgstat_report_wait_end();
+
return returned_events;
}
diff --git a/src/backend/storage/ipc/shm_mq.c b/src/backend/storage/ipc/shm_mq.c
index 5b32782022..bfb67038ad 100644
--- a/src/backend/storage/ipc/shm_mq.c
+++ b/src/backend/storage/ipc/shm_mq.c
@@ -19,6 +19,7 @@
#include "postgres.h"
#include "miscadmin.h"
+#include "pgstat.h"
#include "postmaster/bgworker.h"
#include "storage/procsignal.h"
#include "storage/shm_mq.h"
@@ -894,7 +895,7 @@ shm_mq_send_bytes(shm_mq_handle *mqh, Size nbytes, const void *data,
* at top of loop, because setting an already-set latch is much
* cheaper than setting one that has been reset.
*/
- WaitLatch(MyLatch, WL_LATCH_SET, 0);
+ WaitLatch(MyLatch, WL_LATCH_SET, 0, WAIT_EVENT_MQ_SEND);
/* Reset the latch so we don't spin. */
ResetLatch(MyLatch);
@@ -991,7 +992,7 @@ shm_mq_receive_bytes(shm_mq *mq, Size bytes_needed, bool nowait,
* loop, because setting an already-set latch is much cheaper than
* setting one that has been reset.
*/
- WaitLatch(MyLatch, WL_LATCH_SET, 0);
+ WaitLatch(MyLatch, WL_LATCH_SET, 0, WAIT_EVENT_MQ_RECEIVE);
/* Reset the latch so we don't spin. */
ResetLatch(MyLatch);
@@ -1090,7 +1091,7 @@ shm_mq_wait_internal(volatile shm_mq *mq, PGPROC *volatile * ptr,
}
/* Wait to be signalled. */
- WaitLatch(MyLatch, WL_LATCH_SET, 0);
+ WaitLatch(MyLatch, WL_LATCH_SET, 0, WAIT_EVENT_MQ_INTERNAL);
/* Reset the latch so we don't spin. */
ResetLatch(MyLatch);
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index 547f1a88fe..fb887b3230 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -22,6 +22,7 @@
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "miscadmin.h"
+#include "pgstat.h"
#include "storage/bufmgr.h"
#include "storage/lmgr.h"
#include "storage/proc.h"
@@ -389,7 +390,7 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag)
}
/* Wait to be signaled by the release of the Relation Lock */
- ProcWaitForSignal();
+ ProcWaitForSignal(WAIT_LOCK | locktag.locktag_type);
/*
* Clear any timeout requests established above. We assume here that the
@@ -469,7 +470,7 @@ ResolveRecoveryConflictWithBufferPin(void)
}
/* Wait to be signaled by UnpinBuffer() */
- ProcWaitForSignal();
+ ProcWaitForSignal(WAIT_BUFFER_PIN);
/*
* Clear any timeout requests established above. We assume here that the
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index dba3809e74..71a4dd4736 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -1676,7 +1676,6 @@ WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner)
set_ps_display(new_status, false);
new_status[len] = '\0'; /* truncate off " waiting" */
}
- pgstat_report_wait_start(WAIT_LOCK, locallock->tag.lock.locktag_type);
awaitedLock = locallock;
awaitedOwner = owner;
@@ -1724,7 +1723,6 @@ WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner)
/* In this path, awaitedLock remains set until LockErrorCleanup */
/* Report change to non-waiting status */
- pgstat_report_wait_end();
if (update_process_title)
{
set_ps_display(new_status, false);
@@ -1739,7 +1737,6 @@ WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner)
awaitedLock = NULL;
/* Report change to non-waiting status */
- pgstat_report_wait_end();
if (update_process_title)
{
set_ps_display(new_status, false);
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 9d08de75ae..a90b54ac86 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -732,9 +732,9 @@ LWLockReportWaitStart(LWLock *lock)
int lockId = T_ID(lock);
if (lock->tranche == 0)
- pgstat_report_wait_start(WAIT_LWLOCK_NAMED, (uint16) lockId);
+ pgstat_report_wait_start(WAIT_LWLOCK_NAMED | (uint16) lockId);
else
- pgstat_report_wait_start(WAIT_LWLOCK_TRANCHE, lock->tranche);
+ pgstat_report_wait_start(WAIT_LWLOCK_TRANCHE | lock->tranche);
}
/*
@@ -750,7 +750,7 @@ LWLockReportWaitEnd(void)
* Return an identifier for an LWLock based on the wait class and event.
*/
const char *
-GetLWLockIdentifier(uint8 classId, uint16 eventId)
+GetLWLockIdentifier(uint32 classId, uint16 eventId)
{
if (classId == WAIT_LWLOCK_NAMED)
return MainLWLockNames[eventId];
diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index 4064b2033c..24ed21b487 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -192,6 +192,7 @@
#include "access/xact.h"
#include "access/xlog.h"
#include "miscadmin.h"
+#include "pgstat.h"
#include "storage/bufmgr.h"
#include "storage/predicate.h"
#include "storage/predicate_internals.h"
@@ -1518,7 +1519,7 @@ GetSafeSnapshot(Snapshot origSnapshot)
SxactIsROUnsafe(MySerializableXact)))
{
LWLockRelease(SerializableXactHashLock);
- ProcWaitForSignal();
+ ProcWaitForSignal(WAIT_EVENT_SAFE_SNAPSHOT);
LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
}
MySerializableXact->flags &= ~SXACT_FLAG_DEFERRABLE_WAITING;
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 33e7023656..dd76094bcd 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -39,6 +39,7 @@
#include "access/twophase.h"
#include "access/xact.h"
#include "miscadmin.h"
+#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "replication/slot.h"
#include "replication/syncrep.h"
@@ -1212,7 +1213,8 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
}
else
{
- WaitLatch(MyLatch, WL_LATCH_SET, 0);
+ WaitLatch(MyLatch, WL_LATCH_SET, 0,
+ WAIT_LOCK | locallock->tag.lock.locktag_type);
ResetLatch(MyLatch);
/* check for deadlocks first, as that's probably log-worthy */
if (got_deadlock_timeout)
@@ -1722,9 +1724,9 @@ CheckDeadLockAlert(void)
* wait again if not.
*/
void
-ProcWaitForSignal(void)
+ProcWaitForSignal(uint32 wait_event_info)
{
- WaitLatch(MyLatch, WL_LATCH_SET, 0);
+ WaitLatch(MyLatch, WL_LATCH_SET, 0, wait_event_info);
ResetLatch(MyLatch);
CHECK_FOR_INTERRUPTS();
}
diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c
index 5e705e9308..0da051a2f2 100644
--- a/src/backend/utils/adt/misc.c
+++ b/src/backend/utils/adt/misc.c
@@ -29,6 +29,7 @@
#include "common/keywords.h"
#include "funcapi.h"
#include "miscadmin.h"
+#include "pgstat.h"
#include "parser/scansup.h"
#include "postmaster/syslogger.h"
#include "rewrite/rewriteHandler.h"
@@ -560,7 +561,8 @@ pg_sleep(PG_FUNCTION_ARGS)
(void) WaitLatch(MyLatch,
WL_LATCH_SET | WL_TIMEOUT,
- delay_ms);
+ delay_ms,
+ WAIT_EVENT_PG_SLEEP);
ResetLatch(MyLatch);
}
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 0c98c59e72..b530c01984 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -715,15 +715,91 @@ typedef enum BackendState
* Wait Classes
* ----------
*/
-typedef enum WaitClass
+#define WAIT_LWLOCK_NAMED 0x01000000U
+#define WAIT_LWLOCK_TRANCHE 0x02000000U
+#define WAIT_LOCK 0x03000000U
+#define WAIT_BUFFER_PIN 0x04000000U
+#define WAIT_ACTIVITY 0x05000000U
+#define WAIT_CLIENT 0x06000000U
+#define WAIT_EXTENSION 0x07000000U
+#define WAIT_IPC 0x08000000U
+#define WAIT_TIMEOUT 0x09000000U
+
+/* ----------
+ * Wait Events - Activity
+ *
+ * Use this category when a process is waiting because it has no work to do,
+ * unless the "Client" or "Timeout" category describes the situation better.
+ * Typically, this should only be used for background processes.
+ * ----------
+ */
+typedef enum
{
- WAIT_UNDEFINED,
- WAIT_LWLOCK_NAMED,
- WAIT_LWLOCK_TRANCHE,
- WAIT_LOCK,
- WAIT_BUFFER_PIN
-} WaitClass;
+ WAIT_EVENT_ARCHIVER_MAIN = WAIT_ACTIVITY,
+ WAIT_EVENT_AUTOVACUUM_MAIN,
+ WAIT_EVENT_BGWRITER_HIBERNATE,
+ WAIT_EVENT_BGWRITER_MAIN,
+ WAIT_EVENT_CHECKPOINTER_MAIN,
+ WAIT_EVENT_PGSTAT_MAIN,
+ WAIT_EVENT_RECOVERY_WAL_ALL,
+ WAIT_EVENT_RECOVERY_WAL_STREAM,
+ WAIT_EVENT_SYSLOGGER_MAIN,
+ WAIT_EVENT_WAL_RECEIVER_MAIN,
+ WAIT_EVENT_WAL_SENDER_MAIN,
+ WAIT_EVENT_WAL_WRITER_MAIN,
+} WaitEventActivity;
+/* ----------
+ * Wait Events - Client
+ *
+ * Use this category when a process is waiting to send data to or receive data
+ * from the frontend process to which it is connected. This is never used for
+ * a background process, which has no client connection.
+ * ----------
+ */
+typedef enum
+{
+ WAIT_EVENT_CLIENT_READ = WAIT_CLIENT,
+ WAIT_EVENT_CLIENT_WRITE,
+ WAIT_EVENT_SSL_OPEN_SERVER,
+ WAIT_EVENT_WAL_RECEIVER_WAIT_START,
+ WAIT_EVENT_WAL_SENDER_WAIT_WAL,
+ WAIT_EVENT_WAL_SENDER_WRITE_DATA,
+} WaitEventClient;
+
+/* ----------
+ * Wait Events - IPC
+ *
+ * Use this category when a process cannot complete the work it is doing because
+ * it is waiting for a notification from another process.
+ * ----------
+ */
+typedef enum
+{
+ WAIT_EVENT_BGWORKER_SHUTDOWN = WAIT_IPC,
+ WAIT_EVENT_BGWORKER_STARTUP,
+ WAIT_EVENT_EXECUTE_GATHER,
+ WAIT_EVENT_MQ_INTERNAL,
+ WAIT_EVENT_MQ_PUT_MESSAGE,
+ WAIT_EVENT_MQ_RECEIVE,
+ WAIT_EVENT_MQ_SEND,
+ WAIT_EVENT_PARALLEL_FINISH,
+ WAIT_EVENT_SAFE_SNAPSHOT,
+ WAIT_EVENT_SYNC_REP
+} WaitEventIPC;
+
+/* ----------
+ * Wait Events - Timeout
+ *
+ * Use this category when a process is waiting for a timeout to expire.
+ * ----------
+ */
+typedef enum
+{
+ WAIT_EVENT_BASE_BACKUP_THROTTLE = WAIT_TIMEOUT,
+ WAIT_EVENT_PG_SLEEP,
+ WAIT_EVENT_RECOVERY_APPLY_DELAY
+} WaitEventTimeout;
/* ----------
* Command type for progress reporting purposes
@@ -1018,23 +1094,18 @@ extern void pgstat_initstats(Relation rel);
* ----------
*/
static inline void
-pgstat_report_wait_start(uint8 classId, uint16 eventId)
+pgstat_report_wait_start(uint32 wait_event_info)
{
volatile PGPROC *proc = MyProc;
- uint32 wait_event_val;
if (!pgstat_track_activities || !proc)
return;
- wait_event_val = classId;
- wait_event_val <<= 24;
- wait_event_val |= eventId;
-
/*
* Since this is a four-byte field which is always read and written as
* four-bytes, updates are atomic.
*/
- proc->wait_event_info = wait_event_val;
+ proc->wait_event_info = wait_event_info;
}
/* ----------
diff --git a/src/include/storage/latch.h b/src/include/storage/latch.h
index 5179ecc0db..e96e88f2fa 100644
--- a/src/include/storage/latch.h
+++ b/src/include/storage/latch.h
@@ -155,10 +155,13 @@ extern int AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd,
Latch *latch, void *user_data);
extern void ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch);
-extern int WaitEventSetWait(WaitEventSet *set, long timeout, WaitEvent *occurred_events, int nevents);
-extern int WaitLatch(volatile Latch *latch, int wakeEvents, long timeout);
+extern int WaitEventSetWait(WaitEventSet *set, long timeout,
+ WaitEvent *occurred_events, int nevents,
+ uint32 wait_event_info);
+extern int WaitLatch(volatile Latch *latch, int wakeEvents, long timeout,
+ uint32 wait_event_info);
extern int WaitLatchOrSocket(volatile Latch *latch, int wakeEvents,
- pgsocket sock, long timeout);
+ pgsocket sock, long timeout, uint32 wait_event_info);
/*
* Unix implementation uses SIGUSR1 for inter-process signaling.
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index 18931eb046..9a2d86975c 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -184,7 +184,7 @@ extern Size LWLockShmemSize(void);
extern void CreateLWLocks(void);
extern void InitLWLockAccess(void);
-extern const char *GetLWLockIdentifier(uint8 classId, uint16 eventId);
+extern const char *GetLWLockIdentifier(uint32 classId, uint16 eventId);
/*
* Extensions (or core code) can obtain an LWLocks by calling
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index f576f052df..7dc8dac6d1 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -291,7 +291,7 @@ extern void CheckDeadLockAlert(void);
extern bool IsWaitingForLock(void);
extern void LockErrorCleanup(void);
-extern void ProcWaitForSignal(void);
+extern void ProcWaitForSignal(uint32 wait_event_info);
extern void ProcSendSignal(int pid);
extern void BecomeLockGroupLeader(void);
diff --git a/src/test/modules/test_shm_mq/setup.c b/src/test/modules/test_shm_mq/setup.c
index 143df4eb65..cb86f3c37d 100644
--- a/src/test/modules/test_shm_mq/setup.c
+++ b/src/test/modules/test_shm_mq/setup.c
@@ -16,6 +16,7 @@
#include "postgres.h"
#include "miscadmin.h"
+#include "pgstat.h"
#include "postmaster/bgworker.h"
#include "storage/procsignal.h"
#include "storage/shm_toc.h"
@@ -279,7 +280,7 @@ wait_for_workers_to_become_ready(worker_state *wstate,
}
/* Wait to be signalled. */
- WaitLatch(MyLatch, WL_LATCH_SET, 0);
+ WaitLatch(MyLatch, WL_LATCH_SET, 0, WAIT_EXTENSION);
/* Reset the latch so we don't spin. */
ResetLatch(MyLatch);
diff --git a/src/test/modules/test_shm_mq/test.c b/src/test/modules/test_shm_mq/test.c
index dd34bc7e7f..bf11137a96 100644
--- a/src/test/modules/test_shm_mq/test.c
+++ b/src/test/modules/test_shm_mq/test.c
@@ -15,6 +15,7 @@
#include "fmgr.h"
#include "miscadmin.h"
+#include "pgstat.h"
#include "test_shm_mq.h"
@@ -230,7 +231,7 @@ test_shm_mq_pipelined(PG_FUNCTION_ARGS)
* have read or written data and therefore there may now be work
* for us to do.
*/
- WaitLatch(MyLatch, WL_LATCH_SET, 0);
+ WaitLatch(MyLatch, WL_LATCH_SET, 0, WAIT_EXTENSION);
ResetLatch(MyLatch);
CHECK_FOR_INTERRUPTS();
}
--
cgit v1.2.3
From 3d21f08bccd316c3850a1943c1ee1e381dab1588 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Mon, 10 Oct 2016 15:11:33 -0400
Subject: Update user docs for switch to POSIX semaphores.
Since commit ecb0d20a9 hasn't crashed and burned, here's the promised
docs update for it.
In addition to explaining that Linux and FreeBSD ports now use POSIX
semaphores, I did some wordsmithing on pre-existing wording; in
particular trying to clarify which SysV parameters need to be set with
an eye to total usage across all applications.
---
doc/src/sgml/runtime.sgml | 83 ++++++++++++++++++++++++++++-------------------
1 file changed, 50 insertions(+), 33 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml
index 88ec120841..a8efb3d006 100644
--- a/doc/src/sgml/runtime.sgml
+++ b/doc/src/sgml/runtime.sgml
@@ -605,27 +605,47 @@ psql: could not connect to server: No such file or directory
- Shared memory and semaphores are collectively referred to as
- System V>
- IPC> (together with message queues, which are not
- relevant for PostgreSQL>). Except on
- Windows>, where PostgreSQL>
- provides its own replacement implementation of these facilities, these
- facilities are required in order to run
- PostgreSQL>.
+ PostgreSQL> requires the operating system to provide
+ inter-process communication (IPC>) features, specifically
+ shared memory and semaphores. Unix-derived systems typically provide
+ System V>> IPC>,
+ POSIX>> IPC>, or both.
+ Windows> has its own implementation of
+ these features and is not discussed here.
The complete lack of these facilities is usually manifested by an
- Illegal system call> error upon server start. In
- that case there is no alternative but to reconfigure your
+ Illegal system call> error upon server
+ start. In that case there is no alternative but to reconfigure your
kernel. PostgreSQL> won't work without them.
This situation is rare, however, among modern operating systems.
- When PostgreSQL> exceeds one of the various hard
- IPC> limits, the server will refuse to start and
+ Upon starting the server, PostgreSQL> normally allocates
+ a very small amount of System V shared memory, as well as a much larger
+ amount of POSIX (mmap>) shared memory.
+ In addition a significant number of semaphores, which can be either
+ System V or POSIX style, are created at server startup. Currently,
+ POSIX semaphores are used on Linux and FreeBSD systems while other
+ platforms use System V semaphores.
+
+
+
+
+ Prior to PostgreSQL> 9.3, only System V shared memory
+ was used, so the amount of System V shared memory required to start the
+ server was much larger. If you are running an older version of the
+ server, please consult the documentation for your server version.
+
+
+
+
+ System V IPC> features are typically constrained by
+ system-wide allocation limits.
+ When PostgreSQL> exceeds one of these limits,
+ the server will refuse to start and
should leave an instructive error message describing the problem
and what to do about it. (See also .) The relevant kernel
@@ -634,15 +654,6 @@ psql: could not connect to server: No such file or directory
them, however, vary. Suggestions for some platforms are given below.
-
-
- Prior to PostgreSQL> 9.3, the amount of System V shared
- memory required to start the server was much larger. If you are running
- an older version of the server, please consult the documentation for
- your server version.
-
-
-
System V> IPC> Parameters
@@ -651,7 +662,7 @@ psql: could not connect to server: No such file or directory
Name>
Description>
- Reasonable values>
+ Values needed to run one PostgreSQL> instance>
@@ -659,7 +670,7 @@ psql: could not connect to server: No such file or directory
SHMMAX>>
Maximum size of shared memory segment (bytes)>
- at least 1kB (more if running many copies of the server)
+ at least 1kB, but the default is usually much higher
@@ -671,7 +682,9 @@ psql: could not connect to server: No such file or directory
SHMALL>>
Total amount of shared memory available (bytes or pages)>
- if bytes, same as SHMMAX; if pages, ceil(SHMMAX/PAGE_SIZE)>
+ same as SHMMAX if bytes,
+ or ceil(SHMMAX/PAGE_SIZE) if pages,
+ plus room for other applications>
@@ -689,7 +702,7 @@ psql: could not connect to server: No such file or directory
SEMMNI>>
Maximum number of semaphore identifiers (i.e., sets)>
- at least ceil((max_connections + autovacuum_max_workers + max_worker_processes + 5) / 16)>
+ at least ceil((max_connections + autovacuum_max_workers + max_worker_processes + 5) / 16) plus room for other applications>
@@ -725,9 +738,8 @@ psql: could not connect to server: No such file or directory
(typically 48 bytes, on 64-bit platforms) for each copy of the server.
On most modern operating systems, this amount can easily be allocated.
However, if you are running many copies of the server, or if other
- applications are also using System V shared memory, it may be necessary
- to increase SHMMAX>, the maximum size in bytes of a shared
- memory segment, or SHMALL>, the total amount of System V shared
+ applications are also using System V shared memory, it may be necessary to
+ increase SHMALL>, which is the total amount of System V shared
memory system-wide. Note that SHMALL> is measured in pages
rather than bytes on many systems.
@@ -742,6 +754,7 @@ psql: could not connect to server: No such file or directory
+ When using System V semaphores,
PostgreSQL> uses one semaphore per allowed connection
(), allowed autovacuum worker process
() and allowed background
@@ -779,15 +792,19 @@ psql: could not connect to server: No such file or directory
- The SEMMSL> parameter, which determines how many
- semaphores can be in a set, must be at least 17 for
+ Various other settings related to semaphore undo>, such as
+ SEMMNU> and SEMUME>, do not affect
PostgreSQL>.
- Various other settings related to semaphore undo>, such as
- SEMMNU> and SEMUME>, do not affect
- PostgreSQL>.
+ When using POSIX semaphores, the number of semaphores needed is the
+ same as for System V, that is one semaphore per allowed connection
+ (), allowed autovacuum worker process
+ () and allowed background
+ process ().
+ On the platforms where this option is preferred, there is no specific
+ kernel limit on the number of POSIX semaphores.
--
cgit v1.2.3
From e34318725ca5b274efd6f57ea7460e89f4dca9f9 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Tue, 11 Oct 2016 10:08:45 -0400
Subject: Improve documentation for CREATE RECURSIVE VIEW.
It was perhaps not entirely clear that internal self-references shouldn't
be schema-qualified even if the view name is written with a schema.
Spell it out.
Discussion: <871sznz69m.fsf@metapensiero.it>
---
doc/src/sgml/ref/create_view.sgml | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/create_view.sgml b/doc/src/sgml/ref/create_view.sgml
index ede1698051..8641e1925e 100644
--- a/doc/src/sgml/ref/create_view.sgml
+++ b/doc/src/sgml/ref/create_view.sgml
@@ -87,13 +87,13 @@ CREATE [ OR REPLACE ] [ TEMP | TEMPORARY ] [ RECURSIVE ] VIEW
Creates a recursive view. The syntax
-CREATE RECURSIVE VIEW name> (columns>) AS SELECT ...>;
+CREATE RECURSIVE VIEW [ schema> . ] view_name> (column_names>) AS SELECT ...>;
is equivalent to
-CREATE VIEW name> AS WITH RECURSIVE name> (columns>) AS (SELECT ...>) SELECT columns> FROM name>;
+CREATE VIEW [ schema> . ] view_name> AS WITH RECURSIVE view_name> (column_names>) AS (SELECT ...>) SELECT column_names> FROM view_name>;
- A view column list must be specified for a recursive view.
+ A view column name list must be specified for a recursive view.
@@ -462,11 +462,16 @@ CREATE VIEW comedies AS
Create a recursive view consisting of the numbers from 1 to 100:
-CREATE RECURSIVE VIEW nums_1_100 (n) AS
+CREATE RECURSIVE VIEW public.nums_1_100 (n) AS
VALUES (1)
UNION ALL
SELECT n+1 FROM nums_1_100 WHERE n < 100;
-
+
+ Notice that although the recursive view's name is schema-qualified in this
+ CREATE>, its internal self-reference is not schema-qualified.
+ This is because the implicitly-created CTE's name cannot be
+ schema-qualified.
+
--
cgit v1.2.3
From c7e56811fa38cbc39efd6bdd4bb45f2f0444803e Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Tue, 11 Oct 2016 10:33:59 -0400
Subject: Docs: grammatical fix.
Fix poor grammar introduced in 741ccd501.
---
doc/src/sgml/client-auth.sgml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/client-auth.sgml b/doc/src/sgml/client-auth.sgml
index a0d97ffbac..960f5b5871 100644
--- a/doc/src/sgml/client-auth.sgml
+++ b/doc/src/sgml/client-auth.sgml
@@ -711,7 +711,7 @@ local db1,db2,@demodbs all md5
When using an external authentication system such as Ident or GSSAPI,
the name of the operating system user that initiated the connection
- might not be the same as the database user that is to be connect as.
+ might not be the same as the database user (role) that is to be used.
In this case, a user name map can be applied to map the operating system
user name to a database user. To use user name mapping, specify
map=map-name
--
cgit v1.2.3
From 2b860f52ed1b1784cdf3f03886805f5bf250ea74 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Tue, 11 Oct 2016 11:26:04 -0400
Subject: Remove "sco" and "unixware" ports.
SCO OpenServer and SCO UnixWare are more or less dead platforms.
We have never had a buildfarm member testing the "sco" port, and
the last "unixware" member was last heard from in 2012, so it's
fair to doubt that the code even compiles anymore on either one.
Remove both ports. We can always undo this if someone shows up
with an interest in maintaining and testing these platforms.
Discussion: <17177.1476136994@sss.pgh.pa.us>
---
configure | 37 --------
configure.in | 14 ---
doc/src/sgml/Makefile | 6 +-
doc/src/sgml/dfunc.sgml | 26 ------
doc/src/sgml/installation.sgml | 164 ++--------------------------------
doc/src/sgml/runtime.sgml | 58 ------------
src/Makefile.global.in | 1 -
src/Makefile.shlib | 24 -----
src/backend/libpq/pqcomm.c | 10 ---
src/backend/port/dynloader/sco.c | 7 --
src/backend/port/dynloader/sco.h | 46 ----------
src/backend/port/dynloader/unixware.c | 7 --
src/backend/port/dynloader/unixware.h | 49 ----------
src/include/port/sco.h | 7 --
src/include/port/unixware.h | 11 ---
src/include/storage/s_lock.h | 23 -----
src/makefiles/Makefile.sco | 13 ---
src/makefiles/Makefile.unixware | 35 --------
src/port/getrusage.c | 1 -
src/template/sco | 1 -
src/template/unixware | 41 ---------
21 files changed, 6 insertions(+), 575 deletions(-)
delete mode 100644 src/backend/port/dynloader/sco.c
delete mode 100644 src/backend/port/dynloader/sco.h
delete mode 100644 src/backend/port/dynloader/unixware.c
delete mode 100644 src/backend/port/dynloader/unixware.h
delete mode 100644 src/include/port/sco.h
delete mode 100644 src/include/port/unixware.h
delete mode 100644 src/makefiles/Makefile.sco
delete mode 100644 src/makefiles/Makefile.unixware
delete mode 100644 src/template/sco
delete mode 100644 src/template/unixware
(limited to 'doc/src')
diff --git a/configure b/configure
index 1d94256a9e..f4f2f8b7ce 100755
--- a/configure
+++ b/configure
@@ -694,7 +694,6 @@ STRIP_SHARED_LIB
STRIP_STATIC_LIB
STRIP
RANLIB
-ld_R_works
with_gnu_ld
LD
LDFLAGS_SL
@@ -2867,9 +2866,7 @@ dragonfly*) template=netbsd ;;
mingw*) template=win32 ;;
netbsd*) template=netbsd ;;
openbsd*) template=openbsd ;;
- sco*) template=sco ;;
solaris*) template=solaris ;;
- sysv5*) template=unixware ;;
esac
if test x"$template" = x"" ; then
@@ -6382,40 +6379,6 @@ with_gnu_ld=$ac_cv_prog_gnu_ld
-case $host_os in sysv5*)
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ld -R works" >&5
-$as_echo_n "checking whether ld -R works... " >&6; }
-if ${pgac_cv_prog_ld_R+:} false; then :
- $as_echo_n "(cached) " >&6
-else
-
- pgac_save_LDFLAGS=$LDFLAGS; LDFLAGS="$LDFLAGS -Wl,-R/usr/lib"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- pgac_cv_prog_ld_R=yes
-else
- pgac_cv_prog_ld_R=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
- LDFLAGS=$pgac_save_LDFLAGS
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_prog_ld_R" >&5
-$as_echo "$pgac_cv_prog_ld_R" >&6; }
- ld_R_works=$pgac_cv_prog_ld_R
-
-esac
if test -n "$ac_tool_prefix"; then
# Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args.
set dummy ${ac_tool_prefix}ranlib; ac_word=$2
diff --git a/configure.in b/configure.in
index 9ace625029..9f7611caeb 100644
--- a/configure.in
+++ b/configure.in
@@ -69,9 +69,7 @@ dragonfly*) template=netbsd ;;
mingw*) template=win32 ;;
netbsd*) template=netbsd ;;
openbsd*) template=openbsd ;;
- sco*) template=sco ;;
solaris*) template=solaris ;;
- sysv5*) template=unixware ;;
esac
if test x"$template" = x"" ; then
@@ -871,18 +869,6 @@ AC_ARG_VAR(LDFLAGS_SL, [extra linker flags for linking shared libraries only])
PGAC_PROG_LD
AC_SUBST(LD)
AC_SUBST(with_gnu_ld)
-case $host_os in sysv5*)
- AC_CACHE_CHECK([whether ld -R works], [pgac_cv_prog_ld_R],
- [
- pgac_save_LDFLAGS=$LDFLAGS; LDFLAGS="$LDFLAGS -Wl,-R/usr/lib"
- AC_LINK_IFELSE([AC_LANG_PROGRAM([], [])],
- [pgac_cv_prog_ld_R=yes],
- [pgac_cv_prog_ld_R=no])
- LDFLAGS=$pgac_save_LDFLAGS
- ])
- ld_R_works=$pgac_cv_prog_ld_R
- AC_SUBST(ld_R_works)
-esac
AC_PROG_RANLIB
PGAC_CHECK_STRIP
AC_CHECK_TOOL(AR, ar, ar)
diff --git a/doc/src/sgml/Makefile b/doc/src/sgml/Makefile
index 24b895f3c3..5df2f04dd6 100644
--- a/doc/src/sgml/Makefile
+++ b/doc/src/sgml/Makefile
@@ -326,11 +326,7 @@ check: postgres.sgml $(ALMOSTALLSGML) check-tabs
## Install
##
-install: install-html
-
-ifneq ($(PORTNAME), sco)
-install: install-man
-endif
+install: install-html install-man
installdirs:
$(MKDIR_P) '$(DESTDIR)$(htmldir)'/html $(addprefix '$(DESTDIR)$(mandir)'/man, 1 3 $(sqlmansectnum))
diff --git a/doc/src/sgml/dfunc.sgml b/doc/src/sgml/dfunc.sgml
index ba2684cc3c..6a4b7d6e97 100644
--- a/doc/src/sgml/dfunc.sgml
+++ b/doc/src/sgml/dfunc.sgml
@@ -200,32 +200,6 @@ cc -G -o foo.so foo.o
gcc -fpic -c foo.c
gcc -G -o foo.so foo.o
-
-
-
-
-
-
-
- UnixWare>
- UnixWare>shared library>>
-
-
-
- The compiler flag to create PIC is
-K
- PIC
with the SCO compiler and
-fpic
- with GCC. To link shared libraries,
- the compiler option is
-G
with the SCO compiler
- and
-shared
with
- GCC.
-
-cc -K PIC -c foo.c
-cc -G -o foo.so foo.o
-
- or
-
-gcc -fpic -c foo.c
-gcc -shared -o foo.so foo.o
diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml
index 5ee28fcf85..883e575946 100644
--- a/doc/src/sgml/installation.sgml
+++ b/doc/src/sgml/installation.sgml
@@ -2000,8 +2000,8 @@ kill `cat /usr/local/pgsql/data/postmaster.pid`
In general, PostgreSQL> can be expected to work on
these CPU architectures: x86, x86_64, IA64, PowerPC,
- PowerPC 64, S/390, S/390x, Sparc, Sparc 64, ARM, MIPS, MIPSEL, M68K,
- and PA-RISC. Code support exists for M32R and VAX, but these
+ PowerPC 64, S/390, S/390x, Sparc, Sparc 64, ARM, MIPS, MIPSEL,
+ and PA-RISC. Code support exists for M68K, M32R, and VAX, but these
architectures are not known to have been tested recently. It is often
possible to build on an unsupported CPU type by configuring with
--disable-spinlocks
, but performance will be poor.
@@ -2010,11 +2010,11 @@ kill `cat /usr/local/pgsql/data/postmaster.pid`
PostgreSQL> can be expected to work on these operating
systems: Linux (all recent distributions), Windows (Win2000 SP4 and later),
- FreeBSD, OpenBSD, NetBSD, macOS, AIX, HP/UX, Solaris,
- and UnixWare. Other Unix-like systems may also work but are not currently
+ FreeBSD, OpenBSD, NetBSD, macOS, AIX, HP/UX, and Solaris.
+ Other Unix-like systems may also work but are not currently
being tested. In most cases, all CPU architectures supported by
a given operating system will work. Look in
- the below to see if
+ below to see if
there is information
specific to your operating system, particularly if using an older system.
@@ -2639,160 +2639,6 @@ PHSS_30849 s700_800 u2comp/be/plugin library Patch
-
- SCO OpenServer and SCO UnixWare
-
-
- SCO
- installation on
-
-
-
- UnixWare
- installation on
-
-
-
- PostgreSQL can be built on SCO UnixWare 7 and SCO OpenServer 5.
- On OpenServer, you can use either the OpenServer Development Kit
- or the Universal Development Kit. However, some tweaking may be
- needed, as described below.
-
-
-
- Skunkware
-
-
- You should locate your copy of the SCO Skunkware CD. The
- Skunkware CD is included with UnixWare 7 and current versions of
- OpenServer 5. Skunkware includes ready-to-install versions of
- many popular programs that are available on the Internet. For
- example, gzip, gunzip, GNU Make, Flex, and Bison are all
- included. For UnixWare 7.1, this CD is now labeled "Open License
- Software Supplement". If you do not have this CD, the software
- on it is available
- from .
-
-
-
- Skunkware has different versions for UnixWare and OpenServer.
- Make sure you install the correct version for your operating
- system, except as noted below.
-
-
-
- On UnixWare 7.1.3 and beyond, the GCC compiler is included on the
- UDK CD as is GNU Make.
-
-
-
-
- GNU Make
-
-
- You need to use the GNU Make program, which is on the Skunkware
- CD. By default, it installs
- as /usr/local/bin/make.
-
-
-
- As of UnixWare 7.1.3 and above, the GNU Make program is the
- OSTK portion of the UDK CD, and is
- in /usr/gnu/bin/gmake.
-
-
-
-
- Readline
-
-
- The Readline library is on the Skunkware CD. But it is not
- included on the UnixWare 7.1 Skunkware CD. If you have the
- UnixWare 7.0.0 or 7.0.1 Skunkware CDs, you can install it from
- there. Otherwise,
- try .
-
-
-
- By default, Readline installs into /usr/local/lib> and
- /usr/local/include>. However, the
- PostgreSQL configure program will not find it
- there without help. If you installed Readline, then use the
- following options to configure:
-
-./configure --with-libraries=/usr/local/lib --with-includes=/usr/local/include
-
-
-
-
-
- Using the UDK on OpenServer
-
-
- If you are using the new Universal Development Kit (UDK) compiler
- on OpenServer, you need to specify the locations of the UDK
- libraries:
-
-./configure --with-libraries=/udk/usr/lib --with-includes=/udk/usr/include
-
- Putting these together with the Readline options from above:
-
-./configure --with-libraries="/udk/usr/lib /usr/local/lib" --with-includes="/udk/usr/include /usr/local/include"
-
-
-
-
-
- Reading the PostgreSQL Man Pages
-
-
- By default, the PostgreSQL man pages are installed into
- /usr/local/pgsql/share/man. By default, UnixWare
- does not look there for man pages. To be able to read them you
- need to modify the
- MANPATH variable
- in /etc/default/man, for example:
-
-MANPATH=/usr/lib/scohelp/%L/man:/usr/dt/man:/usr/man:/usr/share/man:scohelp:/usr/local/man:/usr/local/pgsql/share/man
-
-
-
-
- On OpenServer, some extra research needs to be invested to make
- the man pages usable, because the man system is a bit different
- from other platforms. Currently, PostgreSQL will not install
- them at all.
-
-
-
-
- C99 Issues with the 7.1.1b Feature Supplement
-
-
- For compilers earlier than the one released with OpenUNIX 8.0.0
- (UnixWare 7.1.2), including the 7.1.1b Feature Supplement, you
- may need to specify
-Xb
- in CFLAGS or the CC
- environment variable. The indication of this is an error in
- compiling tuplesort.c referencing inline
- functions. Apparently there was a change in the 7.1.2(8.0.0)
- compiler and beyond.
-
-
-
-
- Threading on UnixWare
-
-
- For threading, youmust use
-Kpthread
- on all libpq-using programs. libpq
- uses pthread_* calls, which are only
- available with the
-
-Kpthread>/
-Kthread> flag.
-
-
-
-
Solaris
diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml
index a8efb3d006..6ae62b4d2b 100644
--- a/doc/src/sgml/runtime.sgml
+++ b/doc/src/sgml/runtime.sgml
@@ -1086,34 +1086,6 @@ sysctl -w kern.sysv.shmall
-
- SCO OpenServer>
- SCO OpenServer>IPC configuration>>
-
-
-
- In the default configuration, only 512 kB of shared memory per
- segment is allowed. To increase the setting, first change to the
- directory /etc/conf/cf.d>. To display the current value of
- SHMMAX>, run:
-
-./configure -y SHMMAX
-
- To set a new value for SHMMAX>, run:
-
-./configure SHMMAX=value>
-
- where value> is the new value you want to use
- (in bytes). After setting SHMMAX>, rebuild the kernel:
-
-./link_unix
-
- and reboot.
-
-
-
-
-
Solaris> 2.6 to 2.9 (Solaris
6 to Solaris 9)
@@ -1189,36 +1161,6 @@ project.max-msg-ids=(priv,4096,deny)
-
-
- UnixWare>
- UnixWare>IPC configuration>>
-
-
-
- On UnixWare> 7, the maximum size for shared
- memory segments is 512 kB in the default configuration.
- To display the current value of SHMMAX>, run:
-
-/etc/conf/bin/idtune -g SHMMAX
-
- which displays the current, default, minimum, and maximum
- values. To set a new value for SHMMAX>,
- run:
-
-/etc/conf/bin/idtune SHMMAX value>
-
- where value> is the new value you want to use
- (in bytes). After setting SHMMAX>, rebuild the
- kernel:
-
-/etc/conf/bin/idbuild -B
-
- and reboot.
-
-
-
-
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index e1e2c0adaf..ea61eb518f 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -265,7 +265,6 @@ UUID_LIBS = @UUID_LIBS@
UUID_EXTRA_OBJS = @UUID_EXTRA_OBJS@
LD = @LD@
with_gnu_ld = @with_gnu_ld@
-ld_R_works = @ld_R_works@
# We want -L for libpgport.a and libpgcommon.a to be first in LDFLAGS. We
# also need LDFLAGS to be a "recursively expanded" variable, else adjustments
diff --git a/src/Makefile.shlib b/src/Makefile.shlib
index 87c80c5d01..358d90837c 100644
--- a/src/Makefile.shlib
+++ b/src/Makefile.shlib
@@ -236,30 +236,6 @@ ifeq ($(PORTNAME), solaris)
endif
endif
-ifeq ($(PORTNAME), sco)
- ifeq ($(GCC), yes)
- LINK.shared = $(CC) -shared
- else
- LINK.shared = $(CC) -G
- endif
- LINK.shared += -Wl,-z,text
- ifdef soname
- LINK.shared += -Wl,-h,$(soname)
- endif
-endif
-
-ifeq ($(PORTNAME), unixware)
- ifeq ($(GCC), yes)
- LINK.shared = $(CC) -shared
- else
- LINK.shared = $(CC) -G
- endif
- LINK.shared += -Wl,-z,text
- ifdef soname
- LINK.shared += -Wl,-h,$(soname)
- endif
-endif
-
ifeq ($(PORTNAME), cygwin)
LINK.shared = $(CC) -shared
ifdef SO_MAJOR_VERSION
diff --git a/src/backend/libpq/pqcomm.c b/src/backend/libpq/pqcomm.c
index bae96bf18f..affa9bb7ab 100644
--- a/src/backend/libpq/pqcomm.c
+++ b/src/backend/libpq/pqcomm.c
@@ -683,16 +683,6 @@ StreamConnection(pgsocket server_fd, Port *port)
return STATUS_ERROR;
}
-#ifdef SCO_ACCEPT_BUG
-
- /*
- * UnixWare 7+ and OpenServer 5.0.4 are known to have this bug, but it
- * shouldn't hurt to catch it for all versions of those platforms.
- */
- if (port->raddr.addr.ss_family == 0)
- port->raddr.addr.ss_family = AF_UNIX;
-#endif
-
/* fill in the server (local) address */
port->laddr.salen = sizeof(port->laddr.addr);
if (getsockname(port->sock,
diff --git a/src/backend/port/dynloader/sco.c b/src/backend/port/dynloader/sco.c
deleted file mode 100644
index 1e24f494ac..0000000000
--- a/src/backend/port/dynloader/sco.c
+++ /dev/null
@@ -1,7 +0,0 @@
-/*
- * src/backend/port/dynloader/sco.c
- *
- * Dummy file used for nothing at this point
- *
- * see sco.h
- */
diff --git a/src/backend/port/dynloader/sco.h b/src/backend/port/dynloader/sco.h
deleted file mode 100644
index 86f2383729..0000000000
--- a/src/backend/port/dynloader/sco.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * sco.h
- * port-specific prototypes for SCO 3.2v5.2
- *
- *
- * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * src/backend/port/dynloader/sco.h
- *
- *-------------------------------------------------------------------------
- */
-#ifndef PORT_PROTOS_H
-#define PORT_PROTOS_H
-
-#include
-#include "utils/dynamic_loader.h" /* pgrminclude ignore */
-
-/*
- * Dynamic Loader on SCO 3.2v5.0.2
- *
- * this dynamic loader uses the system dynamic loading interface for shared
- * libraries (ie. dlopen/dlsym/dlclose). The user must specify a shared
- * library as the file to be dynamically loaded.
- */
-
-/*
- * In some older systems, the RTLD_NOW flag isn't defined and the mode
- * argument to dlopen must always be 1. The RTLD_GLOBAL flag is wanted
- * if available, but it doesn't exist everywhere.
- * If it doesn't exist, set it to 0 so it has no effect.
- */
-#ifndef RTLD_NOW
-#define RTLD_NOW 1
-#endif
-#ifndef RTLD_GLOBAL
-#define RTLD_GLOBAL 0
-#endif
-
-#define pg_dlopen(f) dlopen((f), RTLD_NOW | RTLD_GLOBAL)
-#define pg_dlsym dlsym
-#define pg_dlclose dlclose
-#define pg_dlerror dlerror
-
-#endif /* PORT_PROTOS_H */
diff --git a/src/backend/port/dynloader/unixware.c b/src/backend/port/dynloader/unixware.c
deleted file mode 100644
index afb36dfe99..0000000000
--- a/src/backend/port/dynloader/unixware.c
+++ /dev/null
@@ -1,7 +0,0 @@
-/*
- * src/backend/port/dynloader/unixware.c
- *
- * Dummy file used for nothing at this point
- *
- * see unixware.h
- */
diff --git a/src/backend/port/dynloader/unixware.h b/src/backend/port/dynloader/unixware.h
deleted file mode 100644
index 130a9a25d5..0000000000
--- a/src/backend/port/dynloader/unixware.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * src/backend/port/dynloader/unixware.h
- *
- *-------------------------------------------------------------------------
- *
- * unixware.h
- * port-specific prototypes for Intel x86/UNIXWARE 7
- *
- *
- * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * unixware.h,v 1.2 1995/03/17 06:40:18 andrew Exp
- *
- *-------------------------------------------------------------------------
- */
-#ifndef PORT_PROTOS_H
-#define PORT_PROTOS_H
-
-#include
-#include "utils/dynamic_loader.h" /* pgrminclude ignore */
-
-/*
- * Dynamic Loader on UnixWare.
- *
- * this dynamic loader uses the system dynamic loading interface for shared
- * libraries (ie. dlopen/dlsym/dlclose). The user must specify a shared
- * library as the file to be dynamically loaded.
- */
-
-/*
- * In some older systems, the RTLD_NOW flag isn't defined and the mode
- * argument to dlopen must always be 1. The RTLD_GLOBAL flag is wanted
- * if available, but it doesn't exist everywhere.
- * If it doesn't exist, set it to 0 so it has no effect.
- */
-#ifndef RTLD_NOW
-#define RTLD_NOW 1
-#endif
-#ifndef RTLD_GLOBAL
-#define RTLD_GLOBAL 0
-#endif
-
-#define pg_dlopen(f) dlopen((f), RTLD_NOW | RTLD_GLOBAL)
-#define pg_dlsym dlsym
-#define pg_dlclose dlclose
-#define pg_dlerror dlerror
-
-#endif /* PORT_PROTOS_H */
diff --git a/src/include/port/sco.h b/src/include/port/sco.h
deleted file mode 100644
index 30811450c9..0000000000
--- a/src/include/port/sco.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/*
- * src/include/port/sco.h
- *
- * see src/backend/libpq/pqcomm.c */
-#define SCO_ACCEPT_BUG
-
-#define USE_UNIVEL_CC
diff --git a/src/include/port/unixware.h b/src/include/port/unixware.h
deleted file mode 100644
index e068820957..0000000000
--- a/src/include/port/unixware.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/*
- * src/include/port/unixware.h
- *
- * see src/backend/libpq/pqcomm.c */
-#define SCO_ACCEPT_BUG
-
-/***************************************
- * Define this if you are compiling with
- * the native UNIXWARE C compiler.
- ***************************************/
-#define USE_UNIVEL_CC
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index 7aad2de43d..3fe29cede6 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -706,29 +706,6 @@ typedef unsigned char slock_t;
#if !defined(HAS_TEST_AND_SET) /* We didn't trigger above, let's try here */
-#if defined(USE_UNIVEL_CC) /* Unixware compiler */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-asm int
-tas(volatile slock_t *s_lock)
-{
-/* UNIVEL wants %mem in column 1, so we don't pgindent this file */
-%mem s_lock
- pushl %ebx
- movl s_lock, %ebx
- movl $255, %eax
- lock
- xchgb %al, (%ebx)
- popl %ebx
-}
-
-#endif /* defined(USE_UNIVEL_CC) */
-
-
#if defined(__hppa) || defined(__hppa__) /* HP PA-RISC, GCC and HP compilers */
/*
* HP's PA-RISC
diff --git a/src/makefiles/Makefile.sco b/src/makefiles/Makefile.sco
deleted file mode 100644
index 993861570a..0000000000
--- a/src/makefiles/Makefile.sco
+++ /dev/null
@@ -1,13 +0,0 @@
-AROPT = cr
-export_dynamic = -Wl,-Bexport
-
-DLSUFFIX = .so
-ifeq ($(GCC), yes)
-CFLAGS_SL = -fpic
-else
-CFLAGS_SL = -K PIC
-endif
-
-# Rule for building a shared library from a single .o file
-%.so: %.o
- $(LD) -G -Bdynamic -o $@ $<
diff --git a/src/makefiles/Makefile.unixware b/src/makefiles/Makefile.unixware
deleted file mode 100644
index a52717b268..0000000000
--- a/src/makefiles/Makefile.unixware
+++ /dev/null
@@ -1,35 +0,0 @@
-AROPT = crs
-ifeq ($(with_gnu_ld), yes)
- export_dynamic = -Wl,-E
-else
- export_dynamic = -Wl,-Bexport
-endif
-
-ifeq ($(ld_R_works), yes)
-ifeq ($(with_gnu_ld), yes)
- rpath = -Wl,-rpath,'$(rpathdir)'
-else
- rpath = -Wl,-R'$(rpathdir)'
-endif
-endif
-
-# Unixware needs threads for everything that uses libpq
-CFLAGS += $(PTHREAD_CFLAGS)
-
-DLSUFFIX = .so
-ifeq ($(GCC), yes)
-CFLAGS_SL = -fpic
-else
-CFLAGS_SL = -K PIC
-endif
-ifeq ($(GCC), yes)
-SO_FLAGS = -shared
-else
-SO_FLAGS = -G
-endif
-
-# Rule for building a shared library from a single .o file
-%.so: %.o
- $(CC) $(CFLAGS) $(LDFLAGS) $(LDFLAGS_SL) $(SO_FLAGS) -o $@ $<
-
-sqlmansect = 5sql
diff --git a/src/port/getrusage.c b/src/port/getrusage.c
index a6f1ef2681..d24af92339 100644
--- a/src/port/getrusage.c
+++ b/src/port/getrusage.c
@@ -18,7 +18,6 @@
#include "rusagestub.h"
/* This code works on:
- * sco
* solaris_i386
* solaris_sparc
* hpux 9.*
diff --git a/src/template/sco b/src/template/sco
deleted file mode 100644
index 9a736da8be..0000000000
--- a/src/template/sco
+++ /dev/null
@@ -1 +0,0 @@
-CC="$CC -b elf"
diff --git a/src/template/unixware b/src/template/unixware
deleted file mode 100644
index d08fca1e6b..0000000000
--- a/src/template/unixware
+++ /dev/null
@@ -1,41 +0,0 @@
-if test "$GCC" != yes; then
- # The -Kno_host is for a bug in the compiler. See -hackers
- # discussion on 7-8/Aug/2003.
- cat >conftest.c <<__EOF__
-extern char *strcpy(char *, const char *);
-
-static void f(char *p, int n){
- strcpy(p+n,"");
-}
-void g(void){
- f(0, 0);
-}
-__EOF__
-
- # Debugging and optimization are mutually exclusive
- if test "$enable_debug" != yes; then
- CFLAGS="-O"
- fi
- if $CC -c -O -Kinline conftest.c >conftest.err 2>&1; then
- CFLAGS="$CFLAGS -Kinline"
- else
- CFLAGS="$CFLAGS -Kinline,no_host"
- fi
- rm -f conftest.*
-
- PTHREAD_CFLAGS="-Kpthread"
-
-# The effect of doing threading for the backend does not work
-# because of a threading bug that appears in the regression tests:
-#
-# in make check, the plpgsql test (plpgsql.sql)
-# set statement_timeout to 1000;
-# select blockme();
-# reset statement_timeout;
-#
-# per report from Olivier PRENANT
-
-fi
-
-# Unixware's ldap library reportedly needs these too
-EXTRA_LDAP_LIBS="-llber -lresolv"
--
cgit v1.2.3
From 64f3524e2c8deebc02808aa5ebdfa17859473add Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Wed, 12 Oct 2016 12:19:56 -0400
Subject: Remove pg_dump/pg_dumpall support for dumping from pre-8.0 servers.
The need for dumping from such ancient servers has decreased to about nil
in the field, so let's remove all the code that catered to it. Aside
from removing a lot of boilerplate variant queries, this allows us to not
have to cope with servers that don't have (a) schemas or (b) pg_depend.
That means we can get rid of assorted squishy code around that. There
may be some nonobvious additional simplifications possible, but this patch
already removes about 1500 lines of code.
I did not remove the ability for pg_restore to read custom-format archives
generated by these old versions (and light testing says that that does
still work). If you have an old server, you probably also have a pg_dump
that will work with it; but you have an old custom-format backup file,
that might be all you have.
It'd be possible at this point to remove fmtQualifiedId()'s version
argument, but I refrained since that would affect code outside pg_dump.
Discussion: <2661.1475849167@sss.pgh.pa.us>
---
doc/src/sgml/ref/pg_dump.sgml | 7 +-
src/bin/pg_dump/dumputils.c | 50 +-
src/bin/pg_dump/pg_backup_archiver.c | 2 +-
src/bin/pg_dump/pg_dump.c | 1761 ++++------------------------------
src/bin/pg_dump/pg_dump.h | 1 -
src/bin/pg_dump/pg_dump_sort.c | 96 +-
src/bin/pg_dump/pg_dumpall.c | 110 +--
7 files changed, 250 insertions(+), 1777 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index be1b684082..371a61427d 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -758,10 +758,9 @@ PostgreSQL documentation
the dump. Instead fail if unable to lock a table within the specified
timeout>. The timeout may be
specified in any of the formats accepted by SET
- statement_timeout>. (Allowed values vary depending on the server
+ statement_timeout>. (Allowed formats vary depending on the server
version you are dumping from, but an integer number of milliseconds
- is accepted by all versions since 7.3. This option is ignored when
- dumping from a pre-7.3 server.)
+ is accepted by all versions.)
@@ -1172,7 +1171,7 @@ CREATE DATABASE foo WITH TEMPLATE template0;
PostgreSQL> server versions newer than
pg_dump>'s version. pg_dump> can also
dump from PostgreSQL> servers older than its own version.
- (Currently, servers back to version 7.0 are supported.)
+ (Currently, servers back to version 8.0 are supported.)
However, pg_dump> cannot dump from
PostgreSQL> servers newer than its own major version;
it will refuse to even try, rather than risk making an invalid dump.
diff --git a/src/bin/pg_dump/dumputils.c b/src/bin/pg_dump/dumputils.c
index cd1e8c4a68..0d5166891e 100644
--- a/src/bin/pg_dump/dumputils.c
+++ b/src/bin/pg_dump/dumputils.c
@@ -18,8 +18,6 @@
#include "fe_utils/string_utils.h"
-#define supports_grant_options(version) ((version) >= 70400)
-
static bool parseAclItem(const char *item, const char *type,
const char *name, const char *subname, int remoteVersion,
PQExpBuffer grantee, PQExpBuffer grantor,
@@ -246,11 +244,9 @@ buildACLCommands(const char *name, const char *subname,
/*
* For the owner, the default privilege level is ALL WITH
- * GRANT OPTION (only ALL prior to 7.4).
+ * GRANT OPTION.
*/
- if (supports_grant_options(remoteVersion)
- ? strcmp(privswgo->data, "ALL") != 0
- : strcmp(privs->data, "ALL") != 0)
+ if (strcmp(privswgo->data, "ALL") != 0)
{
appendPQExpBuffer(firstsql, "%sREVOKE ALL", prefix);
if (subname)
@@ -403,16 +399,19 @@ buildDefaultACLCommands(const char *type, const char *nspname,
* username=privilegecodes/grantor
* or
* group groupname=privilegecodes/grantor
- * (the /grantor part will not be present if pre-7.4 database).
+ * (the "group" case occurs only with servers before 8.1).
+ *
+ * Returns true on success, false on parse error. On success, the components
+ * of the string are returned in the PQExpBuffer parameters.
*
* The returned grantee string will be the dequoted username or groupname
- * (preceded with "group " in the latter case). The returned grantor is
- * the dequoted grantor name or empty. Privilege characters are decoded
- * and split between privileges with grant option (privswgo) and without
- * (privs).
+ * (preceded with "group " in the latter case). Note that a grant to PUBLIC
+ * is represented by an empty grantee string. The returned grantor is the
+ * dequoted grantor name. Privilege characters are decoded and split between
+ * privileges with grant option (privswgo) and without (privs).
*
- * Note: for cross-version compatibility, it's important to use ALL when
- * appropriate.
+ * Note: for cross-version compatibility, it's important to use ALL to
+ * represent the privilege sets whenever appropriate.
*/
static bool
parseAclItem(const char *item, const char *type,
@@ -439,7 +438,7 @@ parseAclItem(const char *item, const char *type,
return false;
}
- /* grantor may be listed after / */
+ /* grantor should appear after / */
slpos = strchr(eqpos + 1, '/');
if (slpos)
{
@@ -452,7 +451,10 @@ parseAclItem(const char *item, const char *type,
}
}
else
- resetPQExpBuffer(grantor);
+ {
+ free(buf);
+ return false;
+ }
/* privilege codes */
#define CONVERT_PRIV(code, keywd) \
@@ -490,29 +492,19 @@ do { \
{
/* table only */
CONVERT_PRIV('a', "INSERT");
- if (remoteVersion >= 70200)
- CONVERT_PRIV('x', "REFERENCES");
+ CONVERT_PRIV('x', "REFERENCES");
/* rest are not applicable to columns */
if (subname == NULL)
{
- if (remoteVersion >= 70200)
- {
- CONVERT_PRIV('d', "DELETE");
- CONVERT_PRIV('t', "TRIGGER");
- }
+ CONVERT_PRIV('d', "DELETE");
+ CONVERT_PRIV('t', "TRIGGER");
if (remoteVersion >= 80400)
CONVERT_PRIV('D', "TRUNCATE");
}
}
/* UPDATE */
- if (remoteVersion >= 70200 ||
- strcmp(type, "SEQUENCE") == 0 ||
- strcmp(type, "SEQUENCES") == 0)
- CONVERT_PRIV('w', "UPDATE");
- else
- /* 7.0 and 7.1 have a simpler worldview */
- CONVERT_PRIV('w', "UPDATE,DELETE");
+ CONVERT_PRIV('w', "UPDATE");
}
else if (strcmp(type, "FUNCTION") == 0 ||
strcmp(type, "FUNCTIONS") == 0)
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index bba8b6ca9f..e237b4a9c9 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -388,7 +388,7 @@ RestoreArchive(Archive *AHX)
* target.
*/
AHX->minRemoteVersion = 0;
- AHX->maxRemoteVersion = 999999;
+ AHX->maxRemoteVersion = 9999999;
ConnectDatabase(AHX, ropt->dbname,
ropt->pghost, ropt->pgport, ropt->username,
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index fde7f59c3d..c821f3b0ee 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -96,9 +96,6 @@ bool g_verbose; /* User wants verbose narration of our
/* subquery used to convert user ID (eg, datdba) to user name */
static const char *username_subquery;
-/* obsolete as of 7.3: */
-static Oid g_last_builtin_oid; /* value of the last builtin oid */
-
/* The specified names/patterns should to match at least one entity */
static int strict_names = 0;
@@ -142,7 +139,7 @@ static void expand_table_name_patterns(Archive *fout,
SimpleStringList *patterns,
SimpleOidList *oids,
bool strict_names);
-static NamespaceInfo *findNamespace(Archive *fout, Oid nsoid, Oid objoid);
+static NamespaceInfo *findNamespace(Archive *fout, Oid nsoid);
static void dumpTableData(Archive *fout, TableDataInfo *tdinfo);
static void refreshMatViewData(Archive *fout, TableDataInfo *tdinfo);
static void guessConstraintInheritance(TableInfo *tblinfo, int numTables);
@@ -236,11 +233,8 @@ static char *convertRegProcReference(Archive *fout,
const char *proc);
static char *convertOperatorReference(Archive *fout, const char *opr);
static char *convertTSFunction(Archive *fout, Oid funcOid);
-static Oid findLastBuiltinOid_V71(Archive *fout, const char *);
-static Oid findLastBuiltinOid_V70(Archive *fout);
static void selectSourceSchema(Archive *fout, const char *schemaName);
static char *getFormattedTypeName(Archive *fout, Oid oid, OidOptions opts);
-static char *myFormatType(const char *typname, int32 typmod);
static void getBlobs(Archive *fout);
static void dumpBlob(Archive *fout, BlobInfo *binfo);
static int dumpBlobs(Archive *fout, void *arg);
@@ -633,10 +627,10 @@ main(int argc, char **argv)
fout->verbose = g_verbose;
/*
- * We allow the server to be back to 7.0, and up to any minor release of
+ * We allow the server to be back to 8.0, and up to any minor release of
* our own major version. (See also version check in pg_dumpall.c.)
*/
- fout->minRemoteVersion = 70000;
+ fout->minRemoteVersion = 80000;
fout->maxRemoteVersion = (PG_VERSION_NUM / 100) * 100 + 99;
fout->numWorkers = numWorkers;
@@ -665,10 +659,8 @@ main(int argc, char **argv)
/* Select the appropriate subquery to convert user IDs to names */
if (fout->remoteVersion >= 80100)
username_subquery = "SELECT rolname FROM pg_catalog.pg_roles WHERE oid =";
- else if (fout->remoteVersion >= 70300)
- username_subquery = "SELECT usename FROM pg_catalog.pg_user WHERE usesysid =";
else
- username_subquery = "SELECT usename FROM pg_user WHERE usesysid =";
+ username_subquery = "SELECT usename FROM pg_catalog.pg_user WHERE usesysid =";
/* check the version for the synchronized snapshots feature */
if (numWorkers > 1 && fout->remoteVersion < 90200
@@ -683,18 +675,6 @@ main(int argc, char **argv)
exit_horribly(NULL,
"Exported snapshots are not supported by this server version.\n");
- /* Find the last built-in OID, if needed */
- if (fout->remoteVersion < 70300)
- {
- if (fout->remoteVersion >= 70100)
- g_last_builtin_oid = findLastBuiltinOid_V71(fout,
- PQdb(GetConnection(fout)));
- else
- g_last_builtin_oid = findLastBuiltinOid_V70(fout);
- if (g_verbose)
- write_msg(NULL, "last built-in OID is %u\n", g_last_builtin_oid);
- }
-
/* Expand schema selection patterns into OID lists */
if (schema_include_patterns.head != NULL)
{
@@ -774,16 +754,11 @@ main(int argc, char **argv)
/*
* Sort the objects into a safe dump order (no forward references).
*
- * In 7.3 or later, we can rely on dependency information to help us
- * determine a safe order, so the initial sort is mostly for cosmetic
- * purposes: we sort by name to ensure that logically identical schemas
- * will dump identically. Before 7.3 we don't have dependencies and we
- * use OID ordering as an (unreliable) guide to creation order.
+ * We rely on dependency information to help us determine a safe order, so
+ * the initial sort is mostly for cosmetic purposes: we sort by name to
+ * ensure that logically identical schemas will dump identically.
*/
- if (fout->remoteVersion >= 70300)
- sortDumpableObjectsByTypeName(dobjs, numObjs);
- else
- sortDumpableObjectsByTypeOid(dobjs, numObjs);
+ sortDumpableObjectsByTypeName(dobjs, numObjs);
/* If we do a parallel dump, we want the largest tables to go first */
if (archiveFormat == archDirectory && numWorkers > 1)
@@ -1000,12 +975,12 @@ setup_connection(Archive *AH, const char *dumpencoding,
ExecuteSqlStatement(AH, "SET INTERVALSTYLE = POSTGRES");
/*
- * If supported, set extra_float_digits so that we can dump float data
- * exactly (given correctly implemented float I/O code, anyway)
+ * Set extra_float_digits so that we can dump float data exactly (given
+ * correctly implemented float I/O code, anyway)
*/
if (AH->remoteVersion >= 90000)
ExecuteSqlStatement(AH, "SET extra_float_digits TO 3");
- else if (AH->remoteVersion >= 70400)
+ else
ExecuteSqlStatement(AH, "SET extra_float_digits TO 2");
/*
@@ -1018,8 +993,7 @@ setup_connection(Archive *AH, const char *dumpencoding,
/*
* Disable timeouts if supported.
*/
- if (AH->remoteVersion >= 70300)
- ExecuteSqlStatement(AH, "SET statement_timeout = 0");
+ ExecuteSqlStatement(AH, "SET statement_timeout = 0");
if (AH->remoteVersion >= 90300)
ExecuteSqlStatement(AH, "SET lock_timeout = 0");
if (AH->remoteVersion >= 90600)
@@ -1065,16 +1039,12 @@ setup_connection(Archive *AH, const char *dumpencoding,
"SET TRANSACTION ISOLATION LEVEL "
"REPEATABLE READ, READ ONLY");
}
- else if (AH->remoteVersion >= 70400)
+ else
{
- /* note: comma was not accepted in SET TRANSACTION before 8.0 */
ExecuteSqlStatement(AH,
"SET TRANSACTION ISOLATION LEVEL "
- "SERIALIZABLE READ ONLY");
+ "SERIALIZABLE, READ ONLY");
}
- else
- ExecuteSqlStatement(AH,
- "SET TRANSACTION ISOLATION LEVEL SERIALIZABLE");
/*
* If user specified a snapshot to use, select that. In a parallel dump
@@ -1190,9 +1160,6 @@ expand_schema_name_patterns(Archive *fout,
if (patterns->head == NULL)
return; /* nothing to do */
- if (fout->remoteVersion < 70300)
- exit_horribly(NULL, "server version must be at least 7.3 to use schema selection switches\n");
-
query = createPQExpBuffer();
/*
@@ -1661,15 +1628,12 @@ dumpTableData_copy(Archive *fout, void *dcontext)
selectSourceSchema(fout, tbinfo->dobj.namespace->dobj.name);
/*
- * If possible, specify the column list explicitly so that we have no
- * possibility of retrieving data in the wrong column order. (The default
- * column ordering of COPY will not be what we want in certain corner
- * cases involving ADD COLUMN and inheritance.)
+ * Specify the column list explicitly so that we have no possibility of
+ * retrieving data in the wrong column order. (The default column
+ * ordering of COPY will not be what we want in certain corner cases
+ * involving ADD COLUMN and inheritance.)
*/
- if (fout->remoteVersion >= 70300)
- column_list = fmtCopyColumnList(tbinfo, clistBuf);
- else
- column_list = ""; /* can't select columns in COPY */
+ column_list = fmtCopyColumnList(tbinfo, clistBuf);
if (oids && hasoids)
{
@@ -1829,22 +1793,11 @@ dumpTableData_insert(Archive *fout, void *dcontext)
*/
selectSourceSchema(fout, tbinfo->dobj.namespace->dobj.name);
- if (fout->remoteVersion >= 70100)
- {
- appendPQExpBuffer(q, "DECLARE _pg_dump_cursor CURSOR FOR "
- "SELECT * FROM ONLY %s",
- fmtQualifiedId(fout->remoteVersion,
- tbinfo->dobj.namespace->dobj.name,
- classname));
- }
- else
- {
- appendPQExpBuffer(q, "DECLARE _pg_dump_cursor CURSOR FOR "
- "SELECT * FROM %s",
- fmtQualifiedId(fout->remoteVersion,
- tbinfo->dobj.namespace->dobj.name,
- classname));
- }
+ appendPQExpBuffer(q, "DECLARE _pg_dump_cursor CURSOR FOR "
+ "SELECT * FROM ONLY %s",
+ fmtQualifiedId(fout->remoteVersion,
+ tbinfo->dobj.namespace->dobj.name,
+ classname));
if (tdinfo->filtercond)
appendPQExpBuffer(q, " %s", tdinfo->filtercond);
@@ -2480,7 +2433,7 @@ dumpDatabase(Archive *fout)
username_subquery);
appendStringLiteralAH(dbQry, datname, fout);
}
- else if (fout->remoteVersion >= 80000)
+ else
{
appendPQExpBuffer(dbQry, "SELECT tableoid, oid, "
"(%s datdba) AS dba, "
@@ -2492,34 +2445,6 @@ dumpDatabase(Archive *fout)
username_subquery);
appendStringLiteralAH(dbQry, datname, fout);
}
- else if (fout->remoteVersion >= 70100)
- {
- appendPQExpBuffer(dbQry, "SELECT tableoid, oid, "
- "(%s datdba) AS dba, "
- "pg_encoding_to_char(encoding) AS encoding, "
- "NULL AS datcollate, NULL AS datctype, "
- "0 AS datfrozenxid, 0 AS datminmxid, "
- "NULL AS tablespace "
- "FROM pg_database "
- "WHERE datname = ",
- username_subquery);
- appendStringLiteralAH(dbQry, datname, fout);
- }
- else
- {
- appendPQExpBuffer(dbQry, "SELECT "
- "(SELECT oid FROM pg_class WHERE relname = 'pg_database') AS tableoid, "
- "oid, "
- "(%s datdba) AS dba, "
- "pg_encoding_to_char(encoding) AS encoding, "
- "NULL AS datcollate, NULL AS datctype, "
- "0 AS datfrozenxid, 0 AS datminmxid, "
- "NULL AS tablespace "
- "FROM pg_database "
- "WHERE datname = ",
- username_subquery);
- appendStringLiteralAH(dbQry, datname, fout);
- }
res = ExecuteSqlQueryForSingleRow(fout, dbQry->data);
@@ -2879,19 +2804,13 @@ getBlobs(Archive *fout)
"NULL AS initrlomacl "
" FROM pg_largeobject_metadata",
username_subquery);
- else if (fout->remoteVersion >= 70100)
+ else
appendPQExpBufferStr(blobQry,
"SELECT DISTINCT loid AS oid, "
"NULL::name AS rolname, NULL::oid AS lomacl, "
"NULL::oid AS rlomacl, NULL::oid AS initlomacl, "
"NULL::oid AS initrlomacl "
" FROM pg_largeobject");
- else
- appendPQExpBufferStr(blobQry,
- "SELECT oid, NULL AS rolname, NULL AS lomacl, "
- "NULL AS rlomacl, NULL AS initlomacl, "
- "NULL AS initrlomacl "
- " FROM pg_class WHERE relkind = 'l'");
res = ExecuteSqlQuery(fout, blobQry->data, PGRES_TUPLES_OK);
@@ -3031,10 +2950,8 @@ dumpBlobs(Archive *fout, void *arg)
*/
if (fout->remoteVersion >= 90000)
blobQry = "DECLARE bloboid CURSOR FOR SELECT oid FROM pg_largeobject_metadata";
- else if (fout->remoteVersion >= 70100)
- blobQry = "DECLARE bloboid CURSOR FOR SELECT DISTINCT loid FROM pg_largeobject";
else
- blobQry = "DECLARE bloboid CURSOR FOR SELECT oid FROM pg_class WHERE relkind = 'l'";
+ blobQry = "DECLARE bloboid CURSOR FOR SELECT DISTINCT loid FROM pg_largeobject";
ExecuteSqlStatement(fout, blobQry);
@@ -3536,45 +3453,6 @@ getNamespaces(Archive *fout, int *numNamespaces)
int i_initnspacl;
int i_initrnspacl;
- /*
- * Before 7.3, there are no real namespaces; create two dummy entries, one
- * for user stuff and one for system stuff.
- */
- if (fout->remoteVersion < 70300)
- {
- nsinfo = (NamespaceInfo *) pg_malloc(2 * sizeof(NamespaceInfo));
-
- nsinfo[0].dobj.objType = DO_NAMESPACE;
- nsinfo[0].dobj.catId.tableoid = 0;
- nsinfo[0].dobj.catId.oid = 0;
- AssignDumpId(&nsinfo[0].dobj);
- nsinfo[0].dobj.name = pg_strdup("public");
- nsinfo[0].rolname = pg_strdup("");
- nsinfo[0].nspacl = pg_strdup("");
- nsinfo[0].rnspacl = pg_strdup("");
- nsinfo[0].initnspacl = pg_strdup("");
- nsinfo[0].initrnspacl = pg_strdup("");
-
- selectDumpableNamespace(&nsinfo[0], fout);
-
- nsinfo[1].dobj.objType = DO_NAMESPACE;
- nsinfo[1].dobj.catId.tableoid = 0;
- nsinfo[1].dobj.catId.oid = 1;
- AssignDumpId(&nsinfo[1].dobj);
- nsinfo[1].dobj.name = pg_strdup("pg_catalog");
- nsinfo[1].rolname = pg_strdup("");
- nsinfo[1].nspacl = pg_strdup("");
- nsinfo[1].rnspacl = pg_strdup("");
- nsinfo[1].initnspacl = pg_strdup("");
- nsinfo[1].initrnspacl = pg_strdup("");
-
- selectDumpableNamespace(&nsinfo[1], fout);
-
- *numNamespaces = 2;
-
- return nsinfo;
- }
-
query = createPQExpBuffer();
/* Make sure we are in proper schema */
@@ -3684,37 +3562,16 @@ getNamespaces(Archive *fout, int *numNamespaces)
/*
* findNamespace:
- * given a namespace OID and an object OID, look up the info read by
- * getNamespaces
- *
- * NB: for pre-7.3 source database, we use object OID to guess whether it's
- * a system object or not. In 7.3 and later there is no guessing, and we
- * don't use objoid at all.
+ * given a namespace OID, look up the info read by getNamespaces
*/
static NamespaceInfo *
-findNamespace(Archive *fout, Oid nsoid, Oid objoid)
+findNamespace(Archive *fout, Oid nsoid)
{
NamespaceInfo *nsinfo;
- if (fout->remoteVersion >= 70300)
- {
- nsinfo = findNamespaceByOid(nsoid);
- }
- else
- {
- /* This code depends on the dummy objects set up by getNamespaces. */
- Oid i;
-
- if (objoid > g_last_builtin_oid)
- i = 0; /* user object */
- else
- i = 1; /* system object */
- nsinfo = findNamespaceByOid(i);
- }
-
+ nsinfo = findNamespaceByOid(nsoid);
if (nsinfo == NULL)
exit_horribly(NULL, "schema with OID %u does not exist\n", nsoid);
-
return nsinfo;
}
@@ -3932,7 +3789,7 @@ getTypes(Archive *fout, int *numTypes)
"FROM pg_type",
username_subquery);
}
- else if (fout->remoteVersion >= 70300)
+ else
{
appendPQExpBuffer(query, "SELECT tableoid, oid, typname, "
"typnamespace, NULL AS typacl, NULL as rtypacl, "
@@ -3947,38 +3804,6 @@ getTypes(Archive *fout, int *numTypes)
"FROM pg_type",
username_subquery);
}
- else if (fout->remoteVersion >= 70100)
- {
- appendPQExpBuffer(query, "SELECT tableoid, oid, typname, "
- "0::oid AS typnamespace, NULL AS typacl, NULL as rtypacl, "
- "NULL AS inittypacl, NULL AS initrtypacl, "
- "(%s typowner) AS rolname, "
- "typinput::oid AS typinput, "
- "typoutput::oid AS typoutput, typelem, typrelid, "
- "CASE WHEN typrelid = 0 THEN ' '::\"char\" "
- "ELSE (SELECT relkind FROM pg_class WHERE oid = typrelid) END AS typrelkind, "
- "typtype, typisdefined, "
- "typname[0] = '_' AND typelem != 0 AS isarray "
- "FROM pg_type",
- username_subquery);
- }
- else
- {
- appendPQExpBuffer(query, "SELECT "
- "(SELECT oid FROM pg_class WHERE relname = 'pg_type') AS tableoid, "
- "oid, typname, "
- "0::oid AS typnamespace, NULL AS typacl, NULL as rtypacl, "
- "NULL AS inittypacl, NULL AS initrtypacl, "
- "(%s typowner) AS rolname, "
- "typinput::oid AS typinput, "
- "typoutput::oid AS typoutput, typelem, typrelid, "
- "CASE WHEN typrelid = 0 THEN ' '::\"char\" "
- "ELSE (SELECT relkind FROM pg_class WHERE oid = typrelid) END AS typrelkind, "
- "typtype, typisdefined, "
- "typname[0] = '_' AND typelem != 0 AS isarray "
- "FROM pg_type",
- username_subquery);
- }
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
@@ -4013,8 +3838,7 @@ getTypes(Archive *fout, int *numTypes)
tyinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_typname));
tyinfo[i].dobj.namespace =
findNamespace(fout,
- atooid(PQgetvalue(res, i, i_typnamespace)),
- tyinfo[i].dobj.catId.oid);
+ atooid(PQgetvalue(res, i, i_typnamespace)));
tyinfo[i].rolname = pg_strdup(PQgetvalue(res, i, i_rolname));
tyinfo[i].typacl = pg_strdup(PQgetvalue(res, i, i_typacl));
tyinfo[i].rtypacl = pg_strdup(PQgetvalue(res, i, i_rtypacl));
@@ -4083,48 +3907,6 @@ getTypes(Archive *fout, int *numTypes)
* this is taken care of while sorting dependencies.
*/
stinfo->dobj.dump = DUMP_COMPONENT_NONE;
-
- /*
- * However, if dumping from pre-7.3, there will be no dependency
- * info so we have to fake it here. We only need to worry about
- * typinput and typoutput since the other functions only exist
- * post-7.3.
- */
- if (fout->remoteVersion < 70300)
- {
- Oid typinput;
- Oid typoutput;
- FuncInfo *funcInfo;
-
- typinput = atooid(PQgetvalue(res, i, i_typinput));
- typoutput = atooid(PQgetvalue(res, i, i_typoutput));
-
- funcInfo = findFuncByOid(typinput);
- if (funcInfo && funcInfo->dobj.dump & DUMP_COMPONENT_DEFINITION)
- {
- /* base type depends on function */
- addObjectDependency(&tyinfo[i].dobj,
- funcInfo->dobj.dumpId);
- /* function depends on shell type */
- addObjectDependency(&funcInfo->dobj,
- stinfo->dobj.dumpId);
- /* mark shell type as to be dumped */
- stinfo->dobj.dump = DUMP_COMPONENT_ALL;
- }
-
- funcInfo = findFuncByOid(typoutput);
- if (funcInfo && funcInfo->dobj.dump & DUMP_COMPONENT_DEFINITION)
- {
- /* base type depends on function */
- addObjectDependency(&tyinfo[i].dobj,
- funcInfo->dobj.dumpId);
- /* function depends on shell type */
- addObjectDependency(&funcInfo->dobj,
- stinfo->dobj.dumpId);
- /* mark shell type as to be dumped */
- stinfo->dobj.dump = DUMP_COMPONENT_ALL;
- }
- }
}
if (strlen(tyinfo[i].rolname) == 0)
@@ -4172,38 +3954,13 @@ getOperators(Archive *fout, int *numOprs)
/* Make sure we are in proper schema */
selectSourceSchema(fout, "pg_catalog");
- if (fout->remoteVersion >= 70300)
- {
- appendPQExpBuffer(query, "SELECT tableoid, oid, oprname, "
- "oprnamespace, "
- "(%s oprowner) AS rolname, "
- "oprkind, "
- "oprcode::oid AS oprcode "
- "FROM pg_operator",
- username_subquery);
- }
- else if (fout->remoteVersion >= 70100)
- {
- appendPQExpBuffer(query, "SELECT tableoid, oid, oprname, "
- "0::oid AS oprnamespace, "
- "(%s oprowner) AS rolname, "
- "oprkind, "
- "oprcode::oid AS oprcode "
- "FROM pg_operator",
- username_subquery);
- }
- else
- {
- appendPQExpBuffer(query, "SELECT "
- "(SELECT oid FROM pg_class WHERE relname = 'pg_operator') AS tableoid, "
- "oid, oprname, "
- "0::oid AS oprnamespace, "
- "(%s oprowner) AS rolname, "
- "oprkind, "
- "oprcode::oid AS oprcode "
- "FROM pg_operator",
- username_subquery);
- }
+ appendPQExpBuffer(query, "SELECT tableoid, oid, oprname, "
+ "oprnamespace, "
+ "(%s oprowner) AS rolname, "
+ "oprkind, "
+ "oprcode::oid AS oprcode "
+ "FROM pg_operator",
+ username_subquery);
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
@@ -4229,8 +3986,7 @@ getOperators(Archive *fout, int *numOprs)
oprinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_oprname));
oprinfo[i].dobj.namespace =
findNamespace(fout,
- atooid(PQgetvalue(res, i, i_oprnamespace)),
- oprinfo[i].dobj.catId.oid);
+ atooid(PQgetvalue(res, i, i_oprnamespace)));
oprinfo[i].rolname = pg_strdup(PQgetvalue(res, i, i_rolname));
oprinfo[i].oprkind = (PQgetvalue(res, i, i_oprkind))[0];
oprinfo[i].oprcode = atooid(PQgetvalue(res, i, i_oprcode));
@@ -4319,8 +4075,7 @@ getCollations(Archive *fout, int *numCollations)
collinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_collname));
collinfo[i].dobj.namespace =
findNamespace(fout,
- atooid(PQgetvalue(res, i, i_collnamespace)),
- collinfo[i].dobj.catId.oid);
+ atooid(PQgetvalue(res, i, i_collnamespace)));
collinfo[i].rolname = pg_strdup(PQgetvalue(res, i, i_rolname));
/* Decide whether we want to dump it */
@@ -4358,13 +4113,6 @@ getConversions(Archive *fout, int *numConversions)
int i_connamespace;
int i_rolname;
- /* Conversions didn't exist pre-7.3 */
- if (fout->remoteVersion < 70300)
- {
- *numConversions = 0;
- return NULL;
- }
-
query = createPQExpBuffer();
/*
@@ -4403,8 +4151,7 @@ getConversions(Archive *fout, int *numConversions)
convinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_conname));
convinfo[i].dobj.namespace =
findNamespace(fout,
- atooid(PQgetvalue(res, i, i_connamespace)),
- convinfo[i].dobj.catId.oid);
+ atooid(PQgetvalue(res, i, i_connamespace)));
convinfo[i].rolname = pg_strdup(PQgetvalue(res, i, i_rolname));
/* Decide whether we want to dump it */
@@ -4527,30 +4274,11 @@ getOpclasses(Archive *fout, int *numOpclasses)
/* Make sure we are in proper schema */
selectSourceSchema(fout, "pg_catalog");
- if (fout->remoteVersion >= 70300)
- {
- appendPQExpBuffer(query, "SELECT tableoid, oid, opcname, "
- "opcnamespace, "
- "(%s opcowner) AS rolname "
- "FROM pg_opclass",
- username_subquery);
- }
- else if (fout->remoteVersion >= 70100)
- {
- appendPQExpBufferStr(query, "SELECT tableoid, oid, opcname, "
- "0::oid AS opcnamespace, "
- "''::name AS rolname "
- "FROM pg_opclass");
- }
- else
- {
- appendPQExpBufferStr(query, "SELECT "
- "(SELECT oid FROM pg_class WHERE relname = 'pg_opclass') AS tableoid, "
- "oid, opcname, "
- "0::oid AS opcnamespace, "
- "''::name AS rolname "
- "FROM pg_opclass");
- }
+ appendPQExpBuffer(query, "SELECT tableoid, oid, opcname, "
+ "opcnamespace, "
+ "(%s opcowner) AS rolname "
+ "FROM pg_opclass",
+ username_subquery);
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
@@ -4574,8 +4302,7 @@ getOpclasses(Archive *fout, int *numOpclasses)
opcinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_opcname));
opcinfo[i].dobj.namespace =
findNamespace(fout,
- atooid(PQgetvalue(res, i, i_opcnamespace)),
- opcinfo[i].dobj.catId.oid);
+ atooid(PQgetvalue(res, i, i_opcnamespace)));
opcinfo[i].rolname = pg_strdup(PQgetvalue(res, i, i_rolname));
/* Decide whether we want to dump it */
@@ -4584,12 +4311,9 @@ getOpclasses(Archive *fout, int *numOpclasses)
/* Op Classes do not currently have ACLs. */
opcinfo[i].dobj.dump &= ~DUMP_COMPONENT_ACL;
- if (fout->remoteVersion >= 70300)
- {
- if (strlen(opcinfo[i].rolname) == 0)
- write_msg(NULL, "WARNING: owner of operator class \"%s\" appears to be invalid\n",
- opcinfo[i].dobj.name);
- }
+ if (strlen(opcinfo[i].rolname) == 0)
+ write_msg(NULL, "WARNING: owner of operator class \"%s\" appears to be invalid\n",
+ opcinfo[i].dobj.name);
}
PQclear(res);
@@ -4665,8 +4389,7 @@ getOpfamilies(Archive *fout, int *numOpfamilies)
opfinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_opfname));
opfinfo[i].dobj.namespace =
findNamespace(fout,
- atooid(PQgetvalue(res, i, i_opfnamespace)),
- opfinfo[i].dobj.catId.oid);
+ atooid(PQgetvalue(res, i, i_opfnamespace)));
opfinfo[i].rolname = pg_strdup(PQgetvalue(res, i, i_rolname));
/* Decide whether we want to dump it */
@@ -4675,12 +4398,9 @@ getOpfamilies(Archive *fout, int *numOpfamilies)
/* Extensions do not currently have ACLs. */
opfinfo[i].dobj.dump &= ~DUMP_COMPONENT_ACL;
- if (fout->remoteVersion >= 70300)
- {
- if (strlen(opfinfo[i].rolname) == 0)
- write_msg(NULL, "WARNING: owner of operator family \"%s\" appears to be invalid\n",
- opfinfo[i].dobj.name);
- }
+ if (strlen(opfinfo[i].rolname) == 0)
+ write_msg(NULL, "WARNING: owner of operator family \"%s\" appears to be invalid\n",
+ opfinfo[i].dobj.name);
}
PQclear(res);
@@ -4798,7 +4518,7 @@ getAggregates(Archive *fout, int *numAggs)
"deptype = 'e')");
appendPQExpBufferChar(query, ')');
}
- else if (fout->remoteVersion >= 70300)
+ else
{
appendPQExpBuffer(query, "SELECT tableoid, oid, proname AS aggname, "
"pronamespace AS aggnamespace, "
@@ -4814,38 +4534,6 @@ getAggregates(Archive *fout, int *numAggs)
"(SELECT oid FROM pg_namespace WHERE nspname = 'pg_catalog')",
username_subquery);
}
- else if (fout->remoteVersion >= 70100)
- {
- appendPQExpBuffer(query, "SELECT tableoid, oid, aggname, "
- "0::oid AS aggnamespace, "
- "CASE WHEN aggbasetype = 0 THEN 0 ELSE 1 END AS pronargs, "
- "aggbasetype AS proargtypes, "
- "(%s aggowner) AS rolname, "
- "NULL AS aggacl, "
- "NULL AS raggacl, "
- "NULL AS initaggacl, NULL AS initraggacl "
- "FROM pg_aggregate "
- "where oid > '%u'::oid",
- username_subquery,
- g_last_builtin_oid);
- }
- else
- {
- appendPQExpBuffer(query, "SELECT "
- "(SELECT oid FROM pg_class WHERE relname = 'pg_aggregate') AS tableoid, "
- "oid, aggname, "
- "0::oid AS aggnamespace, "
- "CASE WHEN aggbasetype = 0 THEN 0 ELSE 1 END AS pronargs, "
- "aggbasetype AS proargtypes, "
- "(%s aggowner) AS rolname, "
- "NULL AS aggacl, "
- "NULL AS raggacl, "
- "NULL AS initaggacl, NULL AS initraggacl "
- "FROM pg_aggregate "
- "where oid > '%u'::oid",
- username_subquery,
- g_last_builtin_oid);
- }
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
@@ -4875,8 +4563,7 @@ getAggregates(Archive *fout, int *numAggs)
agginfo[i].aggfn.dobj.name = pg_strdup(PQgetvalue(res, i, i_aggname));
agginfo[i].aggfn.dobj.namespace =
findNamespace(fout,
- atooid(PQgetvalue(res, i, i_aggnamespace)),
- agginfo[i].aggfn.dobj.catId.oid);
+ atooid(PQgetvalue(res, i, i_aggnamespace)));
agginfo[i].aggfn.rolname = pg_strdup(PQgetvalue(res, i, i_rolname));
if (strlen(agginfo[i].aggfn.rolname) == 0)
write_msg(NULL, "WARNING: owner of aggregate function \"%s\" appears to be invalid\n",
@@ -4893,13 +4580,9 @@ getAggregates(Archive *fout, int *numAggs)
else
{
agginfo[i].aggfn.argtypes = (Oid *) pg_malloc(agginfo[i].aggfn.nargs * sizeof(Oid));
- if (fout->remoteVersion >= 70300)
- parseOidArray(PQgetvalue(res, i, i_proargtypes),
- agginfo[i].aggfn.argtypes,
- agginfo[i].aggfn.nargs);
- else
- /* it's just aggbasetype */
- agginfo[i].aggfn.argtypes[0] = atooid(PQgetvalue(res, i, i_proargtypes));
+ parseOidArray(PQgetvalue(res, i, i_proargtypes),
+ agginfo[i].aggfn.argtypes,
+ agginfo[i].aggfn.nargs);
}
/* Decide whether we want to dump it */
@@ -5025,7 +4708,7 @@ getFuncs(Archive *fout, int *numFuncs)
destroyPQExpBuffer(initacl_subquery);
destroyPQExpBuffer(initracl_subquery);
}
- else if (fout->remoteVersion >= 70300)
+ else
{
appendPQExpBuffer(query,
"SELECT tableoid, oid, proname, prolang, "
@@ -5056,39 +4739,6 @@ getFuncs(Archive *fout, int *numFuncs)
"deptype = 'e')");
appendPQExpBufferChar(query, ')');
}
- else if (fout->remoteVersion >= 70100)
- {
- appendPQExpBuffer(query,
- "SELECT tableoid, oid, proname, prolang, "
- "pronargs, proargtypes, prorettype, "
- "NULL AS proacl, "
- "NULL AS rproacl, "
- "NULL as initproacl, NULL AS initrproacl, "
- "0::oid AS pronamespace, "
- "(%s proowner) AS rolname "
- "FROM pg_proc "
- "WHERE pg_proc.oid > '%u'::oid",
- username_subquery,
- g_last_builtin_oid);
- }
- else
- {
- appendPQExpBuffer(query,
- "SELECT "
- "(SELECT oid FROM pg_class "
- " WHERE relname = 'pg_proc') AS tableoid, "
- "oid, proname, prolang, "
- "pronargs, proargtypes, prorettype, "
- "NULL AS proacl, "
- "NULL AS rproacl, "
- "NULL as initproacl, NULL AS initrproacl, "
- "0::oid AS pronamespace, "
- "(%s proowner) AS rolname "
- "FROM pg_proc "
- "where pg_proc.oid > '%u'::oid",
- username_subquery,
- g_last_builtin_oid);
- }
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
@@ -5121,8 +4771,7 @@ getFuncs(Archive *fout, int *numFuncs)
finfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_proname));
finfo[i].dobj.namespace =
findNamespace(fout,
- atooid(PQgetvalue(res, i, i_pronamespace)),
- finfo[i].dobj.catId.oid);
+ atooid(PQgetvalue(res, i, i_pronamespace)));
finfo[i].rolname = pg_strdup(PQgetvalue(res, i, i_rolname));
finfo[i].lang = atooid(PQgetvalue(res, i, i_prolang));
finfo[i].prorettype = atooid(PQgetvalue(res, i, i_prorettype));
@@ -5645,7 +5294,7 @@ getTables(Archive *fout, int *numTables)
RELKIND_RELATION, RELKIND_SEQUENCE,
RELKIND_VIEW, RELKIND_COMPOSITE_TYPE);
}
- else if (fout->remoteVersion >= 80000)
+ else
{
/*
* Left join to pick up dependency info linking sequences to their
@@ -5686,153 +5335,6 @@ getTables(Archive *fout, int *numTables)
RELKIND_RELATION, RELKIND_SEQUENCE,
RELKIND_VIEW, RELKIND_COMPOSITE_TYPE);
}
- else if (fout->remoteVersion >= 70300)
- {
- /*
- * Left join to pick up dependency info linking sequences to their
- * owning column, if any
- */
- appendPQExpBuffer(query,
- "SELECT c.tableoid, c.oid, relname, "
- "relacl, NULL as rrelacl, "
- "NULL AS initrelacl, NULL AS initrrelacl, "
- "relkind, relnamespace, "
- "(%s relowner) AS rolname, "
- "relchecks, (reltriggers <> 0) AS relhastriggers, "
- "relhasindex, relhasrules, relhasoids, "
- "'f'::bool AS relrowsecurity, "
- "'f'::bool AS relforcerowsecurity, "
- "0 AS relfrozenxid, 0 AS relminmxid,"
- "0 AS toid, "
- "0 AS tfrozenxid, 0 AS tminmxid,"
- "'p' AS relpersistence, 't' as relispopulated, "
- "'d' AS relreplident, relpages, "
- "NULL AS reloftype, "
- "d.refobjid AS owning_tab, "
- "d.refobjsubid AS owning_col, "
- "NULL AS reltablespace, "
- "NULL AS reloptions, "
- "NULL AS toast_reloptions, "
- "NULL AS changed_acl "
- "FROM pg_class c "
- "LEFT JOIN pg_depend d ON "
- "(c.relkind = '%c' AND "
- "d.classid = c.tableoid AND d.objid = c.oid AND "
- "d.objsubid = 0 AND "
- "d.refclassid = c.tableoid AND d.deptype = 'i') "
- "WHERE relkind IN ('%c', '%c', '%c', '%c') "
- "ORDER BY c.oid",
- username_subquery,
- RELKIND_SEQUENCE,
- RELKIND_RELATION, RELKIND_SEQUENCE,
- RELKIND_VIEW, RELKIND_COMPOSITE_TYPE);
- }
- else if (fout->remoteVersion >= 70200)
- {
- appendPQExpBuffer(query,
- "SELECT tableoid, oid, relname, relacl, "
- "NULL as rrelacl, "
- "NULL AS initrelacl, NULL AS initrrelacl, "
- "relkind, "
- "0::oid AS relnamespace, "
- "(%s relowner) AS rolname, "
- "relchecks, (reltriggers <> 0) AS relhastriggers, "
- "relhasindex, relhasrules, relhasoids, "
- "'f'::bool AS relrowsecurity, "
- "'f'::bool AS relforcerowsecurity, "
- "0 AS relfrozenxid, 0 AS relminmxid,"
- "0 AS toid, "
- "0 AS tfrozenxid, 0 AS tminmxid,"
- "'p' AS relpersistence, 't' as relispopulated, "
- "'d' AS relreplident, relpages, "
- "NULL AS reloftype, "
- "NULL::oid AS owning_tab, "
- "NULL::int4 AS owning_col, "
- "NULL AS reltablespace, "
- "NULL AS reloptions, "
- "NULL AS toast_reloptions, "
- "NULL AS changed_acl "
- "FROM pg_class "
- "WHERE relkind IN ('%c', '%c', '%c') "
- "ORDER BY oid",
- username_subquery,
- RELKIND_RELATION, RELKIND_SEQUENCE, RELKIND_VIEW);
- }
- else if (fout->remoteVersion >= 70100)
- {
- /* all tables have oids in 7.1 */
- appendPQExpBuffer(query,
- "SELECT tableoid, oid, relname, relacl, "
- "NULL as rrelacl, "
- "NULL AS initrelacl, NULL AS initrrelacl, "
- "relkind, "
- "0::oid AS relnamespace, "
- "(%s relowner) AS rolname, "
- "relchecks, (reltriggers <> 0) AS relhastriggers, "
- "relhasindex, relhasrules, "
- "'t'::bool AS relhasoids, "
- "'f'::bool AS relrowsecurity, "
- "'f'::bool AS relforcerowsecurity, "
- "0 AS relfrozenxid, 0 AS relminmxid,"
- "0 AS toid, "
- "0 AS tfrozenxid, 0 AS tminmxid,"
- "'p' AS relpersistence, 't' as relispopulated, "
- "'d' AS relreplident, relpages, "
- "NULL AS reloftype, "
- "NULL::oid AS owning_tab, "
- "NULL::int4 AS owning_col, "
- "NULL AS reltablespace, "
- "NULL AS reloptions, "
- "NULL AS toast_reloptions, "
- "NULL AS changed_acl "
- "FROM pg_class "
- "WHERE relkind IN ('%c', '%c', '%c') "
- "ORDER BY oid",
- username_subquery,
- RELKIND_RELATION, RELKIND_SEQUENCE, RELKIND_VIEW);
- }
- else
- {
- /*
- * Before 7.1, view relkind was not set to 'v', so we must check if we
- * have a view by looking for a rule in pg_rewrite.
- */
- appendPQExpBuffer(query,
- "SELECT "
- "(SELECT oid FROM pg_class WHERE relname = 'pg_class') AS tableoid, "
- "oid, relname, relacl, NULL as rrelacl, "
- "NULL AS initrelacl, NULL AS initrrelacl, "
- "CASE WHEN relhasrules and relkind = 'r' "
- " and EXISTS(SELECT rulename FROM pg_rewrite r WHERE "
- " r.ev_class = c.oid AND r.ev_type = '1') "
- "THEN '%c'::\"char\" "
- "ELSE relkind END AS relkind,"
- "0::oid AS relnamespace, "
- "(%s relowner) AS rolname, "
- "relchecks, (reltriggers <> 0) AS relhastriggers, "
- "relhasindex, relhasrules, "
- "'t'::bool AS relhasoids, "
- "'f'::bool AS relrowsecurity, "
- "'f'::bool AS relforcerowsecurity, "
- "0 AS relfrozenxid, 0 AS relminmxid,"
- "0 AS toid, "
- "0 AS tfrozenxid, 0 AS tminmxid,"
- "'p' AS relpersistence, 't' as relispopulated, "
- "'d' AS relreplident, 0 AS relpages, "
- "NULL AS reloftype, "
- "NULL::oid AS owning_tab, "
- "NULL::int4 AS owning_col, "
- "NULL AS reltablespace, "
- "NULL AS reloptions, "
- "NULL AS toast_reloptions, "
- "NULL AS changed_acl "
- "FROM pg_class c "
- "WHERE relkind IN ('%c', '%c') "
- "ORDER BY oid",
- RELKIND_VIEW,
- username_subquery,
- RELKIND_RELATION, RELKIND_SEQUENCE);
- }
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
@@ -5886,7 +5388,7 @@ getTables(Archive *fout, int *numTables)
i_reloftype = PQfnumber(res, "reloftype");
i_changed_acl = PQfnumber(res, "changed_acl");
- if (dopt->lockWaitTimeout && fout->remoteVersion >= 70300)
+ if (dopt->lockWaitTimeout)
{
/*
* Arrange to fail instead of waiting forever for a table lock.
@@ -5910,8 +5412,7 @@ getTables(Archive *fout, int *numTables)
tblinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_relname));
tblinfo[i].dobj.namespace =
findNamespace(fout,
- atooid(PQgetvalue(res, i, i_relnamespace)),
- tblinfo[i].dobj.catId.oid);
+ atooid(PQgetvalue(res, i, i_relnamespace)));
tblinfo[i].rolname = pg_strdup(PQgetvalue(res, i, i_rolname));
tblinfo[i].relacl = pg_strdup(PQgetvalue(res, i, i_relacl));
tblinfo[i].rrelacl = pg_strdup(PQgetvalue(res, i, i_rrelacl));
@@ -6017,7 +5518,7 @@ getTables(Archive *fout, int *numTables)
tblinfo[i].dobj.name);
}
- if (dopt->lockWaitTimeout && fout->remoteVersion >= 70300)
+ if (dopt->lockWaitTimeout)
{
ExecuteSqlStatement(fout, "SET statement_timeout = 0");
}
@@ -6290,7 +5791,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
"ORDER BY indexname",
tbinfo->dobj.catId.oid);
}
- else if (fout->remoteVersion >= 80000)
+ else
{
appendPQExpBuffer(query,
"SELECT t.tableoid, t.oid, "
@@ -6319,87 +5820,6 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
"ORDER BY indexname",
tbinfo->dobj.catId.oid);
}
- else if (fout->remoteVersion >= 70300)
- {
- appendPQExpBuffer(query,
- "SELECT t.tableoid, t.oid, "
- "t.relname AS indexname, "
- "pg_catalog.pg_get_indexdef(i.indexrelid) AS indexdef, "
- "t.relnatts AS indnkeys, "
- "i.indkey, i.indisclustered, "
- "false AS indisreplident, t.relpages, "
- "c.contype, c.conname, "
- "c.condeferrable, c.condeferred, "
- "c.tableoid AS contableoid, "
- "c.oid AS conoid, "
- "null AS condef, "
- "NULL AS tablespace, "
- "null AS indreloptions "
- "FROM pg_catalog.pg_index i "
- "JOIN pg_catalog.pg_class t ON (t.oid = i.indexrelid) "
- "LEFT JOIN pg_catalog.pg_depend d "
- "ON (d.classid = t.tableoid "
- "AND d.objid = t.oid "
- "AND d.deptype = 'i') "
- "LEFT JOIN pg_catalog.pg_constraint c "
- "ON (d.refclassid = c.tableoid "
- "AND d.refobjid = c.oid) "
- "WHERE i.indrelid = '%u'::pg_catalog.oid "
- "ORDER BY indexname",
- tbinfo->dobj.catId.oid);
- }
- else if (fout->remoteVersion >= 70100)
- {
- appendPQExpBuffer(query,
- "SELECT t.tableoid, t.oid, "
- "t.relname AS indexname, "
- "pg_get_indexdef(i.indexrelid) AS indexdef, "
- "t.relnatts AS indnkeys, "
- "i.indkey, false AS indisclustered, "
- "false AS indisreplident, t.relpages, "
- "CASE WHEN i.indisprimary THEN 'p'::char "
- "ELSE '0'::char END AS contype, "
- "t.relname AS conname, "
- "false AS condeferrable, "
- "false AS condeferred, "
- "0::oid AS contableoid, "
- "t.oid AS conoid, "
- "null AS condef, "
- "NULL AS tablespace, "
- "null AS indreloptions "
- "FROM pg_index i, pg_class t "
- "WHERE t.oid = i.indexrelid "
- "AND i.indrelid = '%u'::oid "
- "ORDER BY indexname",
- tbinfo->dobj.catId.oid);
- }
- else
- {
- appendPQExpBuffer(query,
- "SELECT "
- "(SELECT oid FROM pg_class WHERE relname = 'pg_class') AS tableoid, "
- "t.oid, "
- "t.relname AS indexname, "
- "pg_get_indexdef(i.indexrelid) AS indexdef, "
- "t.relnatts AS indnkeys, "
- "i.indkey, false AS indisclustered, "
- "false AS indisreplident, t.relpages, "
- "CASE WHEN i.indisprimary THEN 'p'::char "
- "ELSE '0'::char END AS contype, "
- "t.relname AS conname, "
- "false AS condeferrable, "
- "false AS condeferred, "
- "0::oid AS contableoid, "
- "t.oid AS conoid, "
- "null AS condef, "
- "NULL AS tablespace, "
- "null AS indreloptions "
- "FROM pg_index i, pg_class t "
- "WHERE t.oid = i.indexrelid "
- "AND i.indrelid = '%u'::oid "
- "ORDER BY indexname",
- tbinfo->dobj.catId.oid);
- }
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
@@ -6442,19 +5862,9 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
indxinfo[j].indnkeys = atoi(PQgetvalue(res, j, i_indnkeys));
indxinfo[j].tablespace = pg_strdup(PQgetvalue(res, j, i_tablespace));
indxinfo[j].indreloptions = pg_strdup(PQgetvalue(res, j, i_indreloptions));
-
- /*
- * In pre-7.4 releases, indkeys may contain more entries than
- * indnkeys says (since indnkeys will be 1 for a functional
- * index). We don't actually care about this case since we don't
- * examine indkeys except for indexes associated with PRIMARY and
- * UNIQUE constraints, which are never functional indexes. But we
- * have to allocate enough space to keep parseOidArray from
- * complaining.
- */
- indxinfo[j].indkeys = (Oid *) pg_malloc(INDEX_MAX_KEYS * sizeof(Oid));
+ indxinfo[j].indkeys = (Oid *) pg_malloc(indxinfo[j].indnkeys * sizeof(Oid));
parseOidArray(PQgetvalue(res, j, i_indkey),
- indxinfo[j].indkeys, INDEX_MAX_KEYS);
+ indxinfo[j].indkeys, indxinfo[j].indnkeys);
indxinfo[j].indisclustered = (PQgetvalue(res, j, i_indisclustered)[0] == 't');
indxinfo[j].indisreplident = (PQgetvalue(res, j, i_indisreplident)[0] == 't');
indxinfo[j].relpages = atoi(PQgetvalue(res, j, i_relpages));
@@ -6465,9 +5875,6 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
/*
* If we found a constraint matching the index, create an
* entry for it.
- *
- * In a pre-7.3 database, we take this path iff the index was
- * marked indisprimary.
*/
constrinfo[j].dobj.objType = DO_CONSTRAINT;
constrinfo[j].dobj.catId.tableoid = atooid(PQgetvalue(res, j, i_contableoid));
@@ -6490,10 +5897,6 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
constrinfo[j].separate = true;
indxinfo[j].indexconstraint = constrinfo[j].dobj.dumpId;
-
- /* If pre-7.3 DB, better make sure table comes first */
- addObjectDependency(&constrinfo[j].dobj,
- tbinfo->dobj.dumpId);
}
else
{
@@ -6532,10 +5935,6 @@ getConstraints(Archive *fout, TableInfo tblinfo[], int numTables)
i_condef;
int ntups;
- /* pg_constraint was created in 7.3, so nothing to do if older */
- if (fout->remoteVersion < 70300)
- return;
-
query = createPQExpBuffer();
for (i = 0; i < numTables; i++)
@@ -6621,10 +6020,6 @@ getDomainConstraints(Archive *fout, TypeInfo *tyinfo)
i_consrc;
int ntups;
- /* pg_constraint was created in 7.3, so nothing to do if older */
- if (fout->remoteVersion < 70300)
- return;
-
/*
* select appropriate schema to ensure names in constraint are properly
* qualified
@@ -6642,17 +6037,9 @@ getDomainConstraints(Archive *fout, TypeInfo *tyinfo)
"ORDER BY conname",
tyinfo->dobj.catId.oid);
- else if (fout->remoteVersion >= 70400)
- appendPQExpBuffer(query, "SELECT tableoid, oid, conname, "
- "pg_catalog.pg_get_constraintdef(oid) AS consrc, "
- "true as convalidated "
- "FROM pg_catalog.pg_constraint "
- "WHERE contypid = '%u'::pg_catalog.oid "
- "ORDER BY conname",
- tyinfo->dobj.catId.oid);
else
appendPQExpBuffer(query, "SELECT tableoid, oid, conname, "
- "'CHECK (' || consrc || ')' AS consrc, "
+ "pg_catalog.pg_get_constraintdef(oid) AS consrc, "
"true as convalidated "
"FROM pg_catalog.pg_constraint "
"WHERE contypid = '%u'::pg_catalog.oid "
@@ -6745,20 +6132,10 @@ getRules(Archive *fout, int *numRules)
"FROM pg_rewrite "
"ORDER BY oid");
}
- else if (fout->remoteVersion >= 70100)
- {
- appendPQExpBufferStr(query, "SELECT "
- "tableoid, oid, rulename, "
- "ev_class AS ruletable, ev_type, is_instead, "
- "'O'::char AS ev_enabled "
- "FROM pg_rewrite "
- "ORDER BY oid");
- }
else
{
appendPQExpBufferStr(query, "SELECT "
- "(SELECT oid FROM pg_class WHERE relname = 'pg_rewrite') AS tableoid, "
- "oid, rulename, "
+ "tableoid, oid, rulename, "
"ev_class AS ruletable, ev_type, is_instead, "
"'O'::char AS ev_enabled "
"FROM pg_rewrite "
@@ -6931,7 +6308,7 @@ getTriggers(Archive *fout, TableInfo tblinfo[], int numTables)
"AND tgconstraint = 0",
tbinfo->dobj.catId.oid);
}
- else if (fout->remoteVersion >= 70300)
+ else
{
/*
* We ignore triggers that are tied to a foreign-key constraint,
@@ -6954,34 +6331,7 @@ getTriggers(Archive *fout, TableInfo tblinfo[], int numTables)
" WHERE d.classid = t.tableoid AND d.objid = t.oid AND d.deptype = 'i' AND c.contype = 'f'))",
tbinfo->dobj.catId.oid);
}
- else if (fout->remoteVersion >= 70100)
- {
- appendPQExpBuffer(query,
- "SELECT tgname, tgfoid::regproc AS tgfname, "
- "tgtype, tgnargs, tgargs, tgenabled, "
- "tgisconstraint, tgconstrname, tgdeferrable, "
- "tgconstrrelid, tginitdeferred, tableoid, oid, "
- "(SELECT relname FROM pg_class WHERE oid = tgconstrrelid) "
- " AS tgconstrrelname "
- "FROM pg_trigger "
- "WHERE tgrelid = '%u'::oid",
- tbinfo->dobj.catId.oid);
- }
- else
- {
- appendPQExpBuffer(query,
- "SELECT tgname, tgfoid::regproc AS tgfname, "
- "tgtype, tgnargs, tgargs, tgenabled, "
- "tgisconstraint, tgconstrname, tgdeferrable, "
- "tgconstrrelid, tginitdeferred, "
- "(SELECT oid FROM pg_class WHERE relname = 'pg_trigger') AS tableoid, "
- "oid, "
- "(SELECT relname FROM pg_class WHERE oid = tgconstrrelid) "
- " AS tgconstrrelname "
- "FROM pg_trigger "
- "WHERE tgrelid = '%u'::oid",
- tbinfo->dobj.catId.oid);
- }
+
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
ntups = PQntuples(res);
@@ -7278,7 +6628,7 @@ getProcLangs(Archive *fout, int *numProcLangs)
"ORDER BY oid",
username_subquery);
}
- else if (fout->remoteVersion >= 70400)
+ else
{
/* Languages are owned by the bootstrap superuser, sysid 1 */
appendPQExpBuffer(query, "SELECT tableoid, oid, "
@@ -7292,45 +6642,6 @@ getProcLangs(Archive *fout, int *numProcLangs)
"ORDER BY oid",
username_subquery);
}
- else if (fout->remoteVersion >= 70300)
- {
- /* No clear notion of an owner at all before 7.4 ... */
- appendPQExpBuffer(query, "SELECT tableoid, oid, "
- "lanname, lanpltrusted, lanplcallfoid, "
- "0 AS laninline, lanvalidator, lanacl, "
- "NULL AS rlanacl, "
- "NULL AS initlanacl, NULL AS initrlanacl, "
- "NULL AS lanowner "
- "FROM pg_language "
- "WHERE lanispl "
- "ORDER BY oid");
- }
- else if (fout->remoteVersion >= 70100)
- {
- appendPQExpBuffer(query, "SELECT tableoid, oid, "
- "lanname, lanpltrusted, lanplcallfoid, "
- "0 AS laninline, 0 AS lanvalidator, NULL AS lanacl, "
- "NULL AS rlanacl, "
- "NULL AS initlanacl, NULL AS initrlanacl, "
- "NULL AS lanowner "
- "FROM pg_language "
- "WHERE lanispl "
- "ORDER BY oid");
- }
- else
- {
- appendPQExpBuffer(query, "SELECT "
- "(SELECT oid FROM pg_class WHERE relname = 'pg_language') AS tableoid, "
- "oid, "
- "lanname, lanpltrusted, lanplcallfoid, "
- "0 AS laninline, 0 AS lanvalidator, NULL AS lanacl, "
- "NULL AS rlanacl, "
- "NULL AS initlanacl, NULL AS initrlanacl, "
- "NULL AS lanowner "
- "FROM pg_language "
- "WHERE lanispl "
- "ORDER BY oid");
- }
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
@@ -7379,20 +6690,6 @@ getProcLangs(Archive *fout, int *numProcLangs)
PQgetisnull(res, i, i_initlanacl) &&
PQgetisnull(res, i, i_initrlanacl))
planginfo[i].dobj.dump &= ~DUMP_COMPONENT_ACL;
-
- if (fout->remoteVersion < 70300)
- {
- /*
- * We need to make a dependency to ensure the function will be
- * dumped first. (In 7.3 and later the regular dependency
- * mechanism will handle this for us.)
- */
- FuncInfo *funcInfo = findFuncByOid(planginfo[i].lanplcallfoid);
-
- if (funcInfo)
- addObjectDependency(&planginfo[i].dobj,
- funcInfo->dobj.dumpId);
- }
}
PQclear(res);
@@ -7434,25 +6731,13 @@ getCasts(Archive *fout, int *numCasts)
"castmethod "
"FROM pg_cast ORDER BY 3,4");
}
- else if (fout->remoteVersion >= 70300)
+ else
{
appendPQExpBufferStr(query, "SELECT tableoid, oid, "
"castsource, casttarget, castfunc, castcontext, "
"CASE WHEN castfunc = 0 THEN 'b' ELSE 'f' END AS castmethod "
"FROM pg_cast ORDER BY 3,4");
}
- else
- {
- appendPQExpBufferStr(query, "SELECT 0 AS tableoid, p.oid, "
- "t1.oid AS castsource, t2.oid AS casttarget, "
- "p.oid AS castfunc, 'e' AS castcontext, "
- "'f' AS castmethod "
- "FROM pg_type t1, pg_type t2, pg_proc p "
- "WHERE p.pronargs = 1 AND "
- "p.proargtypes[0] = t1.oid AND "
- "p.prorettype = t2.oid AND p.proname = t2.typname "
- "ORDER BY 3,4");
- }
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
@@ -7499,22 +6784,6 @@ getCasts(Archive *fout, int *numCasts)
sTypeInfo->dobj.name, tTypeInfo->dobj.name);
castinfo[i].dobj.name = namebuf.data;
- if (fout->remoteVersion < 70300 &&
- OidIsValid(castinfo[i].castfunc))
- {
- /*
- * We need to make a dependency to ensure the function will be
- * dumped first. (In 7.3 and later the regular dependency
- * mechanism handles this for us.)
- */
- FuncInfo *funcInfo;
-
- funcInfo = findFuncByOid(castinfo[i].castfunc);
- if (funcInfo)
- addObjectDependency(&castinfo[i].dobj,
- funcInfo->dobj.dumpId);
- }
-
/* Decide whether we want to dump it */
selectDumpableCast(&(castinfo[i]), fout);
@@ -7701,10 +6970,7 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables)
/*
* we must read the attribute names in attribute number order! because
- * we will use the attnum to index into the attnames array later. We
- * actually ask to order by "attrelid, attnum" because (at least up to
- * 7.3) the planner is not smart enough to realize it needn't re-sort
- * the output of an indexscan on pg_attribute_relid_attnum_index.
+ * we will use the attnum to index into the attnames array later.
*/
if (g_verbose)
write_msg(NULL, "finding the columns and types of table \"%s.%s\"\n",
@@ -7736,7 +7002,7 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables)
"ON a.atttypid = t.oid "
"WHERE a.attrelid = '%u'::pg_catalog.oid "
"AND a.attnum > 0::pg_catalog.int2 "
- "ORDER BY a.attrelid, a.attnum",
+ "ORDER BY a.attnum",
tbinfo->dobj.catId.oid);
}
else if (fout->remoteVersion >= 90100)
@@ -7760,7 +7026,7 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables)
"ON a.atttypid = t.oid "
"WHERE a.attrelid = '%u'::pg_catalog.oid "
"AND a.attnum > 0::pg_catalog.int2 "
- "ORDER BY a.attrelid, a.attnum",
+ "ORDER BY a.attnum",
tbinfo->dobj.catId.oid);
}
else if (fout->remoteVersion >= 90000)
@@ -7778,10 +7044,10 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables)
"ON a.atttypid = t.oid "
"WHERE a.attrelid = '%u'::pg_catalog.oid "
"AND a.attnum > 0::pg_catalog.int2 "
- "ORDER BY a.attrelid, a.attnum",
+ "ORDER BY a.attnum",
tbinfo->dobj.catId.oid);
}
- else if (fout->remoteVersion >= 70300)
+ else
{
/* need left join here to not fail on dropped columns ... */
appendPQExpBuffer(q, "SELECT a.attnum, a.attname, a.atttypmod, "
@@ -7795,50 +7061,7 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables)
"ON a.atttypid = t.oid "
"WHERE a.attrelid = '%u'::pg_catalog.oid "
"AND a.attnum > 0::pg_catalog.int2 "
- "ORDER BY a.attrelid, a.attnum",
- tbinfo->dobj.catId.oid);
- }
- else if (fout->remoteVersion >= 70100)
- {
- /*
- * attstattarget doesn't exist in 7.1. It does exist in 7.2, but
- * we don't dump it because we can't tell whether it's been
- * explicitly set or was just a default.
- *
- * attislocal doesn't exist before 7.3, either; in older databases
- * we assume it's TRUE, else we'd fail to dump non-inherited atts.
- */
- appendPQExpBuffer(q, "SELECT a.attnum, a.attname, a.atttypmod, "
- "-1 AS attstattarget, a.attstorage, "
- "t.typstorage, a.attnotnull, a.atthasdef, "
- "false AS attisdropped, a.attlen, "
- "a.attalign, true AS attislocal, "
- "format_type(t.oid,a.atttypmod) AS atttypname, "
- "'' AS attoptions, 0 AS attcollation, "
- "NULL AS attfdwoptions "
- "FROM pg_attribute a LEFT JOIN pg_type t "
- "ON a.atttypid = t.oid "
- "WHERE a.attrelid = '%u'::oid "
- "AND a.attnum > 0::int2 "
- "ORDER BY a.attrelid, a.attnum",
- tbinfo->dobj.catId.oid);
- }
- else
- {
- /* format_type not available before 7.1 */
- appendPQExpBuffer(q, "SELECT attnum, attname, atttypmod, "
- "-1 AS attstattarget, "
- "attstorage, attstorage AS typstorage, "
- "attnotnull, atthasdef, false AS attisdropped, "
- "attlen, attalign, "
- "true AS attislocal, "
- "(SELECT typname FROM pg_type WHERE oid = atttypid) AS atttypname, "
- "'' AS attoptions, 0 AS attcollation, "
- "NULL AS attfdwoptions "
- "FROM pg_attribute a "
- "WHERE attrelid = '%u'::oid "
- "AND attnum > 0::int2 "
- "ORDER BY attrelid, attnum",
+ "ORDER BY a.attnum",
tbinfo->dobj.catId.oid);
}
@@ -7924,42 +7147,12 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables)
tbinfo->dobj.namespace->dobj.name,
tbinfo->dobj.name);
- resetPQExpBuffer(q);
- if (fout->remoteVersion >= 70300)
- {
- appendPQExpBuffer(q, "SELECT tableoid, oid, adnum, "
+ printfPQExpBuffer(q, "SELECT tableoid, oid, adnum, "
"pg_catalog.pg_get_expr(adbin, adrelid) AS adsrc "
- "FROM pg_catalog.pg_attrdef "
- "WHERE adrelid = '%u'::pg_catalog.oid",
- tbinfo->dobj.catId.oid);
- }
- else if (fout->remoteVersion >= 70200)
- {
- /* 7.2 did not have OIDs in pg_attrdef */
- appendPQExpBuffer(q, "SELECT tableoid, 0 AS oid, adnum, "
- "pg_get_expr(adbin, adrelid) AS adsrc "
- "FROM pg_attrdef "
- "WHERE adrelid = '%u'::oid",
- tbinfo->dobj.catId.oid);
- }
- else if (fout->remoteVersion >= 70100)
- {
- /* no pg_get_expr, so must rely on adsrc */
- appendPQExpBuffer(q, "SELECT tableoid, oid, adnum, adsrc "
- "FROM pg_attrdef "
- "WHERE adrelid = '%u'::oid",
- tbinfo->dobj.catId.oid);
- }
- else
- {
- /* no pg_get_expr, no tableoid either */
- appendPQExpBuffer(q, "SELECT "
- "(SELECT oid FROM pg_class WHERE relname = 'pg_attrdef') AS tableoid, "
- "oid, adnum, adsrc "
- "FROM pg_attrdef "
- "WHERE adrelid = '%u'::oid",
- tbinfo->dobj.catId.oid);
- }
+ "FROM pg_catalog.pg_attrdef "
+ "WHERE adrelid = '%u'::pg_catalog.oid",
+ tbinfo->dobj.catId.oid);
+
res = ExecuteSqlQuery(fout, q->data, PGRES_TUPLES_OK);
numDefaults = PQntuples(res);
@@ -8005,17 +7198,11 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables)
if (tbinfo->relkind == RELKIND_VIEW)
{
attrdefs[j].separate = true;
- /* needed in case pre-7.3 DB: */
- addObjectDependency(&attrdefs[j].dobj,
- tbinfo->dobj.dumpId);
}
else if (!shouldPrintColumn(dopt, tbinfo, adnum - 1))
{
/* column will be suppressed, print default separately */
attrdefs[j].separate = true;
- /* needed in case pre-7.3 DB: */
- addObjectDependency(&attrdefs[j].dobj,
- tbinfo->dobj.dumpId);
}
else
{
@@ -8077,7 +7264,7 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables)
"ORDER BY conname",
tbinfo->dobj.catId.oid);
}
- else if (fout->remoteVersion >= 70400)
+ else
{
appendPQExpBuffer(q, "SELECT tableoid, oid, conname, "
"pg_catalog.pg_get_constraintdef(oid) AS consrc, "
@@ -8088,54 +7275,7 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables)
"ORDER BY conname",
tbinfo->dobj.catId.oid);
}
- else if (fout->remoteVersion >= 70300)
- {
- /* no pg_get_constraintdef, must use consrc */
- appendPQExpBuffer(q, "SELECT tableoid, oid, conname, "
- "'CHECK (' || consrc || ')' AS consrc, "
- "true AS conislocal, true AS convalidated "
- "FROM pg_catalog.pg_constraint "
- "WHERE conrelid = '%u'::pg_catalog.oid "
- " AND contype = 'c' "
- "ORDER BY conname",
- tbinfo->dobj.catId.oid);
- }
- else if (fout->remoteVersion >= 70200)
- {
- /* 7.2 did not have OIDs in pg_relcheck */
- appendPQExpBuffer(q, "SELECT tableoid, 0 AS oid, "
- "rcname AS conname, "
- "'CHECK (' || rcsrc || ')' AS consrc, "
- "true AS conislocal, true AS convalidated "
- "FROM pg_relcheck "
- "WHERE rcrelid = '%u'::oid "
- "ORDER BY rcname",
- tbinfo->dobj.catId.oid);
- }
- else if (fout->remoteVersion >= 70100)
- {
- appendPQExpBuffer(q, "SELECT tableoid, oid, "
- "rcname AS conname, "
- "'CHECK (' || rcsrc || ')' AS consrc, "
- "true AS conislocal, true AS convalidated "
- "FROM pg_relcheck "
- "WHERE rcrelid = '%u'::oid "
- "ORDER BY rcname",
- tbinfo->dobj.catId.oid);
- }
- else
- {
- /* no tableoid in 7.0 */
- appendPQExpBuffer(q, "SELECT "
- "(SELECT oid FROM pg_class WHERE relname = 'pg_relcheck') AS tableoid, "
- "oid, rcname AS conname, "
- "'CHECK (' || rcsrc || ')' AS consrc, "
- "true AS conislocal, true AS convalidated "
- "FROM pg_relcheck "
- "WHERE rcrelid = '%u'::oid "
- "ORDER BY rcname",
- tbinfo->dobj.catId.oid);
- }
+
res = ExecuteSqlQuery(fout, q->data, PGRES_TUPLES_OK);
numConstrs = PQntuples(res);
@@ -8303,8 +7443,7 @@ getTSParsers(Archive *fout, int *numTSParsers)
prsinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_prsname));
prsinfo[i].dobj.namespace =
findNamespace(fout,
- atooid(PQgetvalue(res, i, i_prsnamespace)),
- prsinfo[i].dobj.catId.oid);
+ atooid(PQgetvalue(res, i, i_prsnamespace)));
prsinfo[i].prsstart = atooid(PQgetvalue(res, i, i_prsstart));
prsinfo[i].prstoken = atooid(PQgetvalue(res, i, i_prstoken));
prsinfo[i].prsend = atooid(PQgetvalue(res, i, i_prsend));
@@ -8390,8 +7529,7 @@ getTSDictionaries(Archive *fout, int *numTSDicts)
dictinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_dictname));
dictinfo[i].dobj.namespace =
findNamespace(fout,
- atooid(PQgetvalue(res, i, i_dictnamespace)),
- dictinfo[i].dobj.catId.oid);
+ atooid(PQgetvalue(res, i, i_dictnamespace)));
dictinfo[i].rolname = pg_strdup(PQgetvalue(res, i, i_rolname));
dictinfo[i].dicttemplate = atooid(PQgetvalue(res, i, i_dicttemplate));
if (PQgetisnull(res, i, i_dictinitoption))
@@ -8474,8 +7612,7 @@ getTSTemplates(Archive *fout, int *numTSTemplates)
tmplinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_tmplname));
tmplinfo[i].dobj.namespace =
findNamespace(fout,
- atooid(PQgetvalue(res, i, i_tmplnamespace)),
- tmplinfo[i].dobj.catId.oid);
+ atooid(PQgetvalue(res, i, i_tmplnamespace)));
tmplinfo[i].tmplinit = atooid(PQgetvalue(res, i, i_tmplinit));
tmplinfo[i].tmpllexize = atooid(PQgetvalue(res, i, i_tmpllexize));
@@ -8555,8 +7692,7 @@ getTSConfigurations(Archive *fout, int *numTSConfigs)
cfginfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_cfgname));
cfginfo[i].dobj.namespace =
findNamespace(fout,
- atooid(PQgetvalue(res, i, i_cfgnamespace)),
- cfginfo[i].dobj.catId.oid);
+ atooid(PQgetvalue(res, i, i_cfgnamespace)));
cfginfo[i].rolname = pg_strdup(PQgetvalue(res, i, i_rolname));
cfginfo[i].cfgparser = atooid(PQgetvalue(res, i, i_cfgparser));
@@ -8964,8 +8100,7 @@ getDefaultACLs(Archive *fout, int *numDefaultACLs)
daclinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_defaclobjtype));
if (nspid != InvalidOid)
- daclinfo[i].dobj.namespace = findNamespace(fout, nspid,
- daclinfo[i].dobj.catId.oid);
+ daclinfo[i].dobj.namespace = findNamespace(fout, nspid);
else
daclinfo[i].dobj.namespace = NULL;
@@ -9173,14 +8308,6 @@ findComments(Archive *fout, Oid classoid, Oid objoid,
if (ncomments < 0)
ncomments = collectComments(fout, &comments);
- /*
- * Pre-7.2, pg_description does not contain classoid, so collectComments
- * just stores a zero. If there's a collision on object OID, well, you
- * get duplicate comments.
- */
- if (fout->remoteVersion < 70200)
- classoid = 0;
-
/*
* Do binary search to find some item matching the object.
*/
@@ -9268,25 +8395,9 @@ collectComments(Archive *fout, CommentItem **items)
query = createPQExpBuffer();
- if (fout->remoteVersion >= 70300)
- {
- appendPQExpBufferStr(query, "SELECT description, classoid, objoid, objsubid "
- "FROM pg_catalog.pg_description "
- "ORDER BY classoid, objoid, objsubid");
- }
- else if (fout->remoteVersion >= 70200)
- {
- appendPQExpBufferStr(query, "SELECT description, classoid, objoid, objsubid "
- "FROM pg_description "
- "ORDER BY classoid, objoid, objsubid");
- }
- else
- {
- /* Note: this will fail to find attribute comments in pre-7.2... */
- appendPQExpBufferStr(query, "SELECT description, 0 AS classoid, objoid, 0 AS objsubid "
- "FROM pg_description "
- "ORDER BY objoid");
- }
+ appendPQExpBufferStr(query, "SELECT description, classoid, objoid, objsubid "
+ "FROM pg_catalog.pg_description "
+ "ORDER BY classoid, objoid, objsubid");
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
@@ -9468,10 +8579,6 @@ dumpNamespace(Archive *fout, NamespaceInfo *nspinfo)
if (!nspinfo->dobj.dump || dopt->dataOnly)
return;
- /* don't dump dummy namespace from pre-7.3 source */
- if (strlen(nspinfo->dobj.name) == 0)
- return;
-
q = createPQExpBuffer();
delq = createPQExpBuffer();
labelq = createPQExpBuffer();
@@ -10089,71 +9196,35 @@ dumpBaseType(Archive *fout, TypeInfo *tyinfo)
"WHERE oid = '%u'::pg_catalog.oid",
tyinfo->dobj.catId.oid);
}
- else if (fout->remoteVersion >= 80400)
- {
- appendPQExpBuffer(query, "SELECT typlen, "
- "typinput, typoutput, typreceive, typsend, "
- "typmodin, typmodout, typanalyze, "
- "typreceive::pg_catalog.oid AS typreceiveoid, "
- "typsend::pg_catalog.oid AS typsendoid, "
- "typmodin::pg_catalog.oid AS typmodinoid, "
- "typmodout::pg_catalog.oid AS typmodoutoid, "
- "typanalyze::pg_catalog.oid AS typanalyzeoid, "
- "typcategory, typispreferred, "
- "typdelim, typbyval, typalign, typstorage, "
- "false AS typcollatable, "
- "pg_catalog.pg_get_expr(typdefaultbin, 0) AS typdefaultbin, typdefault "
- "FROM pg_catalog.pg_type "
- "WHERE oid = '%u'::pg_catalog.oid",
- tyinfo->dobj.catId.oid);
- }
- else if (fout->remoteVersion >= 80300)
- {
- /* Before 8.4, pg_get_expr does not allow 0 for its second arg */
- appendPQExpBuffer(query, "SELECT typlen, "
- "typinput, typoutput, typreceive, typsend, "
- "typmodin, typmodout, typanalyze, "
- "typreceive::pg_catalog.oid AS typreceiveoid, "
- "typsend::pg_catalog.oid AS typsendoid, "
- "typmodin::pg_catalog.oid AS typmodinoid, "
- "typmodout::pg_catalog.oid AS typmodoutoid, "
- "typanalyze::pg_catalog.oid AS typanalyzeoid, "
- "'U' AS typcategory, false AS typispreferred, "
- "typdelim, typbyval, typalign, typstorage, "
- "false AS typcollatable, "
- "pg_catalog.pg_get_expr(typdefaultbin, 'pg_catalog.pg_type'::pg_catalog.regclass) AS typdefaultbin, typdefault "
- "FROM pg_catalog.pg_type "
- "WHERE oid = '%u'::pg_catalog.oid",
- tyinfo->dobj.catId.oid);
- }
- else if (fout->remoteVersion >= 80000)
+ else if (fout->remoteVersion >= 80400)
{
appendPQExpBuffer(query, "SELECT typlen, "
"typinput, typoutput, typreceive, typsend, "
- "'-' AS typmodin, '-' AS typmodout, "
- "typanalyze, "
+ "typmodin, typmodout, typanalyze, "
"typreceive::pg_catalog.oid AS typreceiveoid, "
"typsend::pg_catalog.oid AS typsendoid, "
- "0 AS typmodinoid, 0 AS typmodoutoid, "
+ "typmodin::pg_catalog.oid AS typmodinoid, "
+ "typmodout::pg_catalog.oid AS typmodoutoid, "
"typanalyze::pg_catalog.oid AS typanalyzeoid, "
- "'U' AS typcategory, false AS typispreferred, "
+ "typcategory, typispreferred, "
"typdelim, typbyval, typalign, typstorage, "
"false AS typcollatable, "
- "pg_catalog.pg_get_expr(typdefaultbin, 'pg_catalog.pg_type'::pg_catalog.regclass) AS typdefaultbin, typdefault "
+ "pg_catalog.pg_get_expr(typdefaultbin, 0) AS typdefaultbin, typdefault "
"FROM pg_catalog.pg_type "
"WHERE oid = '%u'::pg_catalog.oid",
tyinfo->dobj.catId.oid);
}
- else if (fout->remoteVersion >= 70400)
+ else if (fout->remoteVersion >= 80300)
{
+ /* Before 8.4, pg_get_expr does not allow 0 for its second arg */
appendPQExpBuffer(query, "SELECT typlen, "
"typinput, typoutput, typreceive, typsend, "
- "'-' AS typmodin, '-' AS typmodout, "
- "'-' AS typanalyze, "
+ "typmodin, typmodout, typanalyze, "
"typreceive::pg_catalog.oid AS typreceiveoid, "
"typsend::pg_catalog.oid AS typsendoid, "
- "0 AS typmodinoid, 0 AS typmodoutoid, "
- "0 AS typanalyzeoid, "
+ "typmodin::pg_catalog.oid AS typmodinoid, "
+ "typmodout::pg_catalog.oid AS typmodoutoid, "
+ "typanalyze::pg_catalog.oid AS typanalyzeoid, "
"'U' AS typcategory, false AS typispreferred, "
"typdelim, typbyval, typalign, typstorage, "
"false AS typcollatable, "
@@ -10162,16 +9233,16 @@ dumpBaseType(Archive *fout, TypeInfo *tyinfo)
"WHERE oid = '%u'::pg_catalog.oid",
tyinfo->dobj.catId.oid);
}
- else if (fout->remoteVersion >= 70300)
+ else
{
appendPQExpBuffer(query, "SELECT typlen, "
- "typinput, typoutput, "
- "'-' AS typreceive, '-' AS typsend, "
+ "typinput, typoutput, typreceive, typsend, "
"'-' AS typmodin, '-' AS typmodout, "
- "'-' AS typanalyze, "
- "0 AS typreceiveoid, 0 AS typsendoid, "
+ "typanalyze, "
+ "typreceive::pg_catalog.oid AS typreceiveoid, "
+ "typsend::pg_catalog.oid AS typsendoid, "
"0 AS typmodinoid, 0 AS typmodoutoid, "
- "0 AS typanalyzeoid, "
+ "typanalyze::pg_catalog.oid AS typanalyzeoid, "
"'U' AS typcategory, false AS typispreferred, "
"typdelim, typbyval, typalign, typstorage, "
"false AS typcollatable, "
@@ -10180,69 +9251,6 @@ dumpBaseType(Archive *fout, TypeInfo *tyinfo)
"WHERE oid = '%u'::pg_catalog.oid",
tyinfo->dobj.catId.oid);
}
- else if (fout->remoteVersion >= 70200)
- {
- /*
- * Note: although pre-7.3 catalogs contain typreceive and typsend,
- * ignore them because they are not right.
- */
- appendPQExpBuffer(query, "SELECT typlen, "
- "typinput, typoutput, "
- "'-' AS typreceive, '-' AS typsend, "
- "'-' AS typmodin, '-' AS typmodout, "
- "'-' AS typanalyze, "
- "0 AS typreceiveoid, 0 AS typsendoid, "
- "0 AS typmodinoid, 0 AS typmodoutoid, "
- "0 AS typanalyzeoid, "
- "'U' AS typcategory, false AS typispreferred, "
- "typdelim, typbyval, typalign, typstorage, "
- "false AS typcollatable, "
- "NULL AS typdefaultbin, typdefault "
- "FROM pg_type "
- "WHERE oid = '%u'::oid",
- tyinfo->dobj.catId.oid);
- }
- else if (fout->remoteVersion >= 70100)
- {
- /*
- * Ignore pre-7.2 typdefault; the field exists but has an unusable
- * representation.
- */
- appendPQExpBuffer(query, "SELECT typlen, "
- "typinput, typoutput, "
- "'-' AS typreceive, '-' AS typsend, "
- "'-' AS typmodin, '-' AS typmodout, "
- "'-' AS typanalyze, "
- "0 AS typreceiveoid, 0 AS typsendoid, "
- "0 AS typmodinoid, 0 AS typmodoutoid, "
- "0 AS typanalyzeoid, "
- "'U' AS typcategory, false AS typispreferred, "
- "typdelim, typbyval, typalign, typstorage, "
- "false AS typcollatable, "
- "NULL AS typdefaultbin, NULL AS typdefault "
- "FROM pg_type "
- "WHERE oid = '%u'::oid",
- tyinfo->dobj.catId.oid);
- }
- else
- {
- appendPQExpBuffer(query, "SELECT typlen, "
- "typinput, typoutput, "
- "'-' AS typreceive, '-' AS typsend, "
- "'-' AS typmodin, '-' AS typmodout, "
- "'-' AS typanalyze, "
- "0 AS typreceiveoid, 0 AS typsendoid, "
- "0 AS typmodinoid, 0 AS typmodoutoid, "
- "0 AS typanalyzeoid, "
- "'U' AS typcategory, false AS typispreferred, "
- "typdelim, typbyval, typalign, "
- "'p'::char AS typstorage, "
- "false AS typcollatable, "
- "NULL AS typdefaultbin, NULL AS typdefault "
- "FROM pg_type "
- "WHERE oid = '%u'::oid",
- tyinfo->dobj.catId.oid);
- }
res = ExecuteSqlQueryForSingleRow(fout, query->data);
@@ -10300,30 +9308,19 @@ dumpBaseType(Archive *fout, TypeInfo *tyinfo)
qtypname,
(strcmp(typlen, "-1") == 0) ? "variable" : typlen);
- if (fout->remoteVersion >= 70300)
- {
- /* regproc result is correctly quoted as of 7.3 */
- appendPQExpBuffer(q, ",\n INPUT = %s", typinput);
- appendPQExpBuffer(q, ",\n OUTPUT = %s", typoutput);
- if (OidIsValid(typreceiveoid))
- appendPQExpBuffer(q, ",\n RECEIVE = %s", typreceive);
- if (OidIsValid(typsendoid))
- appendPQExpBuffer(q, ",\n SEND = %s", typsend);
- if (OidIsValid(typmodinoid))
- appendPQExpBuffer(q, ",\n TYPMOD_IN = %s", typmodin);
- if (OidIsValid(typmodoutoid))
- appendPQExpBuffer(q, ",\n TYPMOD_OUT = %s", typmodout);
- if (OidIsValid(typanalyzeoid))
- appendPQExpBuffer(q, ",\n ANALYZE = %s", typanalyze);
- }
- else
- {
- /* regproc delivers an unquoted name before 7.3 */
- /* cannot combine these because fmtId uses static result area */
- appendPQExpBuffer(q, ",\n INPUT = %s", fmtId(typinput));
- appendPQExpBuffer(q, ",\n OUTPUT = %s", fmtId(typoutput));
- /* receive/send/typmodin/typmodout/analyze need not be printed */
- }
+ /* regproc result is sufficiently quoted already */
+ appendPQExpBuffer(q, ",\n INPUT = %s", typinput);
+ appendPQExpBuffer(q, ",\n OUTPUT = %s", typoutput);
+ if (OidIsValid(typreceiveoid))
+ appendPQExpBuffer(q, ",\n RECEIVE = %s", typreceive);
+ if (OidIsValid(typsendoid))
+ appendPQExpBuffer(q, ",\n SEND = %s", typsend);
+ if (OidIsValid(typmodinoid))
+ appendPQExpBuffer(q, ",\n TYPMOD_IN = %s", typmodin);
+ if (OidIsValid(typmodoutoid))
+ appendPQExpBuffer(q, ",\n TYPMOD_OUT = %s", typmodout);
+ if (OidIsValid(typanalyzeoid))
+ appendPQExpBuffer(q, ",\n ANALYZE = %s", typanalyze);
if (strcmp(typcollatable, "t") == 0)
appendPQExpBufferStr(q, ",\n COLLATABLE = true");
@@ -10468,7 +9465,6 @@ dumpDomain(Archive *fout, TypeInfo *tyinfo)
}
else
{
- /* We assume here that remoteVersion must be at least 70300 */
appendPQExpBuffer(query, "SELECT typnotnull, "
"pg_catalog.format_type(typbasetype, typtypmod) AS typdefn, "
"pg_catalog.pg_get_expr(typdefaultbin, 'pg_catalog.pg_type'::pg_catalog.regclass) AS typdefaultbin, "
@@ -10669,9 +9665,8 @@ dumpCompositeType(Archive *fout, TypeInfo *tyinfo)
else
{
/*
- * We assume here that remoteVersion must be at least 70300. Since
- * ALTER TYPE could not drop columns until 9.1, attisdropped should
- * always be false.
+ * Since ALTER TYPE could not drop columns until 9.1, attisdropped
+ * should always be false.
*/
appendPQExpBuffer(query, "SELECT a.attname, "
"pg_catalog.format_type(a.atttypid, a.atttypmod) AS atttypdefn, "
@@ -10858,7 +9853,6 @@ dumpCompositeTypeColComments(Archive *fout, TypeInfo *tyinfo)
query = createPQExpBuffer();
- /* We assume here that remoteVersion must be at least 70300 */
appendPQExpBuffer(query,
"SELECT c.tableoid, a.attname, a.attnum "
"FROM pg_catalog.pg_class c, pg_catalog.pg_attribute a "
@@ -11442,7 +10436,7 @@ dumpFunc(Archive *fout, FuncInfo *finfo)
"WHERE oid = '%u'::pg_catalog.oid",
finfo->dobj.catId.oid);
}
- else if (fout->remoteVersion >= 80000)
+ else
{
appendPQExpBuffer(query,
"SELECT proretset, prosrc, probin, "
@@ -11458,58 +10452,6 @@ dumpFunc(Archive *fout, FuncInfo *finfo)
"WHERE oid = '%u'::pg_catalog.oid",
finfo->dobj.catId.oid);
}
- else if (fout->remoteVersion >= 70300)
- {
- appendPQExpBuffer(query,
- "SELECT proretset, prosrc, probin, "
- "null AS proallargtypes, "
- "null AS proargmodes, "
- "null AS proargnames, "
- "false AS proiswindow, "
- "provolatile, proisstrict, prosecdef, "
- "false AS proleakproof, "
- "null AS proconfig, 0 AS procost, 0 AS prorows, "
- "(SELECT lanname FROM pg_catalog.pg_language WHERE oid = prolang) AS lanname "
- "FROM pg_catalog.pg_proc "
- "WHERE oid = '%u'::pg_catalog.oid",
- finfo->dobj.catId.oid);
- }
- else if (fout->remoteVersion >= 70100)
- {
- appendPQExpBuffer(query,
- "SELECT proretset, prosrc, probin, "
- "null AS proallargtypes, "
- "null AS proargmodes, "
- "null AS proargnames, "
- "false AS proiswindow, "
- "case when proiscachable then 'i' else 'v' end AS provolatile, "
- "proisstrict, "
- "false AS prosecdef, "
- "false AS proleakproof, "
- "null AS proconfig, 0 AS procost, 0 AS prorows, "
- "(SELECT lanname FROM pg_language WHERE oid = prolang) AS lanname "
- "FROM pg_proc "
- "WHERE oid = '%u'::oid",
- finfo->dobj.catId.oid);
- }
- else
- {
- appendPQExpBuffer(query,
- "SELECT proretset, prosrc, probin, "
- "null AS proallargtypes, "
- "null AS proargmodes, "
- "null AS proargnames, "
- "false AS proiswindow, "
- "CASE WHEN proiscachable THEN 'i' ELSE 'v' END AS provolatile, "
- "false AS proisstrict, "
- "false AS prosecdef, "
- "false AS proleakproof, "
- "NULL AS proconfig, 0 AS procost, 0 AS prorows, "
- "(SELECT lanname FROM pg_language WHERE oid = prolang) AS lanname "
- "FROM pg_proc "
- "WHERE oid = '%u'::oid",
- finfo->dobj.catId.oid);
- }
res = ExecuteSqlQueryForSingleRow(fout, query->data);
@@ -12082,7 +11024,6 @@ dumpOpr(Archive *fout, OprInfo *oprinfo)
PQExpBuffer labelq;
PQExpBuffer oprid;
PQExpBuffer details;
- const char *name;
PGresult *res;
int i_oprkind;
int i_oprcode;
@@ -12143,7 +11084,7 @@ dumpOpr(Archive *fout, OprInfo *oprinfo)
"WHERE oid = '%u'::pg_catalog.oid",
oprinfo->dobj.catId.oid);
}
- else if (fout->remoteVersion >= 70300)
+ else
{
appendPQExpBuffer(query, "SELECT oprkind, "
"oprcode::pg_catalog.regprocedure, "
@@ -12159,34 +11100,6 @@ dumpOpr(Archive *fout, OprInfo *oprinfo)
"WHERE oid = '%u'::pg_catalog.oid",
oprinfo->dobj.catId.oid);
}
- else if (fout->remoteVersion >= 70100)
- {
- appendPQExpBuffer(query, "SELECT oprkind, oprcode, "
- "CASE WHEN oprleft = 0 THEN '-' "
- "ELSE format_type(oprleft, NULL) END AS oprleft, "
- "CASE WHEN oprright = 0 THEN '-' "
- "ELSE format_type(oprright, NULL) END AS oprright, "
- "oprcom, oprnegate, oprrest, oprjoin, "
- "(oprlsortop != 0) AS oprcanmerge, "
- "oprcanhash "
- "FROM pg_operator "
- "WHERE oid = '%u'::oid",
- oprinfo->dobj.catId.oid);
- }
- else
- {
- appendPQExpBuffer(query, "SELECT oprkind, oprcode, "
- "CASE WHEN oprleft = 0 THEN '-'::name "
- "ELSE (SELECT typname FROM pg_type WHERE oid = oprleft) END AS oprleft, "
- "CASE WHEN oprright = 0 THEN '-'::name "
- "ELSE (SELECT typname FROM pg_type WHERE oid = oprright) END AS oprright, "
- "oprcom, oprnegate, oprrest, oprjoin, "
- "(oprlsortop != 0) AS oprcanmerge, "
- "oprcanhash "
- "FROM pg_operator "
- "WHERE oid = '%u'::oid",
- oprinfo->dobj.catId.oid);
- }
res = ExecuteSqlQueryForSingleRow(fout, query->data);
@@ -12229,12 +11142,8 @@ dumpOpr(Archive *fout, OprInfo *oprinfo)
if (strcmp(oprkind, "r") == 0 ||
strcmp(oprkind, "b") == 0)
{
- if (fout->remoteVersion >= 70100)
- name = oprleft;
- else
- name = fmtId(oprleft);
- appendPQExpBuffer(details, ",\n LEFTARG = %s", name);
- appendPQExpBufferStr(oprid, name);
+ appendPQExpBuffer(details, ",\n LEFTARG = %s", oprleft);
+ appendPQExpBufferStr(oprid, oprleft);
}
else
appendPQExpBufferStr(oprid, "NONE");
@@ -12242,12 +11151,8 @@ dumpOpr(Archive *fout, OprInfo *oprinfo)
if (strcmp(oprkind, "l") == 0 ||
strcmp(oprkind, "b") == 0)
{
- if (fout->remoteVersion >= 70100)
- name = oprright;
- else
- name = fmtId(oprright);
- appendPQExpBuffer(details, ",\n RIGHTARG = %s", name);
- appendPQExpBuffer(oprid, ", %s)", name);
+ appendPQExpBuffer(details, ",\n RIGHTARG = %s", oprright);
+ appendPQExpBuffer(oprid, ", %s)", oprright);
}
else
appendPQExpBufferStr(oprid, ", NONE)");
@@ -12334,40 +11239,34 @@ dumpOpr(Archive *fout, OprInfo *oprinfo)
* Returns allocated string of what to print, or NULL if function references
* is InvalidOid. Returned string is expected to be free'd by the caller.
*
- * In 7.3 the input is a REGPROCEDURE display; we have to strip the
- * argument-types part. In prior versions, the input is a REGPROC display.
+ * The input is a REGPROCEDURE display; we have to strip the argument-types
+ * part.
*/
static char *
convertRegProcReference(Archive *fout, const char *proc)
{
+ char *name;
+ char *paren;
+ bool inquote;
+
/* In all cases "-" means a null reference */
if (strcmp(proc, "-") == 0)
return NULL;
- if (fout->remoteVersion >= 70300)
+ name = pg_strdup(proc);
+ /* find non-double-quoted left paren */
+ inquote = false;
+ for (paren = name; *paren; paren++)
{
- char *name;
- char *paren;
- bool inquote;
-
- name = pg_strdup(proc);
- /* find non-double-quoted left paren */
- inquote = false;
- for (paren = name; *paren; paren++)
+ if (*paren == '(' && !inquote)
{
- if (*paren == '(' && !inquote)
- {
- *paren = '\0';
- break;
- }
- if (*paren == '"')
- inquote = !inquote;
+ *paren = '\0';
+ break;
}
- return name;
+ if (*paren == '"')
+ inquote = !inquote;
}
-
- /* REGPROC before 7.3 does not quote its result */
- return pg_strdup(fmtId(proc));
+ return name;
}
/*
@@ -12376,60 +11275,44 @@ convertRegProcReference(Archive *fout, const char *proc)
* Returns an allocated string of what to print, or NULL to print nothing.
* Caller is responsible for free'ing result string.
*
- * In 7.3 and up the input is a REGOPERATOR display; we have to strip the
- * argument-types part, and add OPERATOR() decoration if the name is
- * schema-qualified. In older versions, the input is just a numeric OID,
- * which we search our operator list for.
+ * The input is a REGOPERATOR display; we have to strip the argument-types
+ * part, and add OPERATOR() decoration if the name is schema-qualified.
*/
static char *
convertOperatorReference(Archive *fout, const char *opr)
{
- OprInfo *oprInfo;
+ char *name;
+ char *oname;
+ char *ptr;
+ bool inquote;
+ bool sawdot;
/* In all cases "0" means a null reference */
if (strcmp(opr, "0") == 0)
return NULL;
- if (fout->remoteVersion >= 70300)
- {
- char *name;
- char *oname;
- char *ptr;
- bool inquote;
- bool sawdot;
-
- name = pg_strdup(opr);
- /* find non-double-quoted left paren, and check for non-quoted dot */
- inquote = false;
- sawdot = false;
- for (ptr = name; *ptr; ptr++)
+ name = pg_strdup(opr);
+ /* find non-double-quoted left paren, and check for non-quoted dot */
+ inquote = false;
+ sawdot = false;
+ for (ptr = name; *ptr; ptr++)
+ {
+ if (*ptr == '"')
+ inquote = !inquote;
+ else if (*ptr == '.' && !inquote)
+ sawdot = true;
+ else if (*ptr == '(' && !inquote)
{
- if (*ptr == '"')
- inquote = !inquote;
- else if (*ptr == '.' && !inquote)
- sawdot = true;
- else if (*ptr == '(' && !inquote)
- {
- *ptr = '\0';
- break;
- }
+ *ptr = '\0';
+ break;
}
- /* If not schema-qualified, don't need to add OPERATOR() */
- if (!sawdot)
- return name;
- oname = psprintf("OPERATOR(%s)", name);
- free(name);
- return oname;
- }
-
- oprInfo = findOprByOid(atooid(opr));
- if (oprInfo == NULL)
- {
- write_msg(NULL, "WARNING: could not find operator with OID %s\n",
- opr);
- return NULL;
}
- return pg_strdup(oprInfo->dobj.name);
+ /* If not schema-qualified, don't need to add OPERATOR() */
+ if (!sawdot)
+ return name;
+ oname = psprintf("OPERATOR(%s)", name);
+ free(name);
+ return oname;
}
/*
@@ -12586,14 +11469,6 @@ dumpOpclass(Archive *fout, OpclassInfo *opcinfo)
if (!opcinfo->dobj.dump || dopt->dataOnly)
return;
- /*
- * XXX currently we do not implement dumping of operator classes from
- * pre-7.3 databases. This could be done but it seems not worth the
- * trouble.
- */
- if (fout->remoteVersion < 70300)
- return;
-
query = createPQExpBuffer();
q = createPQExpBuffer();
delq = createPQExpBuffer();
@@ -13343,7 +12218,7 @@ dumpConversion(Archive *fout, ConvInfo *convinfo)
appendStringLiteralAH(q, conforencoding, fout);
appendPQExpBufferStr(q, " TO ");
appendStringLiteralAH(q, contoencoding, fout);
- /* regproc is automatically quoted in 7.3 and above */
+ /* regproc output is already sufficiently quoted */
appendPQExpBuffer(q, " FROM %s;\n", conproc);
appendPQExpBuffer(labelq, "CONVERSION %s", fmtId(convinfo->dobj.name));
@@ -13569,7 +12444,7 @@ dumpAgg(Archive *fout, AggInfo *agginfo)
"AND p.oid = '%u'::pg_catalog.oid",
agginfo->aggfn.dobj.catId.oid);
}
- else if (fout->remoteVersion >= 70300)
+ else
{
appendPQExpBuffer(query, "SELECT aggtransfn, "
"aggfinalfn, aggtranstype::pg_catalog.regtype, "
@@ -13587,41 +12462,6 @@ dumpAgg(Archive *fout, AggInfo *agginfo)
"AND p.oid = '%u'::pg_catalog.oid",
agginfo->aggfn.dobj.catId.oid);
}
- else if (fout->remoteVersion >= 70100)
- {
- appendPQExpBuffer(query, "SELECT aggtransfn, aggfinalfn, "
- "format_type(aggtranstype, NULL) AS aggtranstype, "
- "'-' AS aggcombinefn, '-' AS aggserialfn, "
- "'-' AS aggdeserialfn, '-' AS aggmtransfn, "
- "'-' AS aggminvtransfn, '-' AS aggmfinalfn, "
- "0 AS aggmtranstype, false AS aggfinalextra, "
- "false AS aggmfinalextra, 0 AS aggsortop, "
- "false AS hypothetical, "
- "0 AS aggtransspace, agginitval, "
- "0 AS aggmtransspace, NULL AS aggminitval, "
- "true AS convertok "
- "FROM pg_aggregate "
- "WHERE oid = '%u'::oid",
- agginfo->aggfn.dobj.catId.oid);
- }
- else
- {
- appendPQExpBuffer(query, "SELECT aggtransfn1 AS aggtransfn, "
- "aggfinalfn, "
- "(SELECT typname FROM pg_type WHERE oid = aggtranstype1) AS aggtranstype, "
- "'-' AS aggcombinefn, '-' AS aggserialfn, "
- "'-' AS aggdeserialfn, '-' AS aggmtransfn, "
- "'-' AS aggminvtransfn, '-' AS aggmfinalfn, "
- "0 AS aggmtranstype, false AS aggfinalextra, "
- "false AS aggmfinalextra, 0 AS aggsortop, "
- "false AS hypothetical, "
- "0 AS aggtransspace, agginitval1 AS agginitval, "
- "0 AS aggmtransspace, NULL AS aggminitval, "
- "(aggtransfn2 = 0 and aggtranstype2 = 0 and agginitval2 is null) AS convertok "
- "FROM pg_aggregate "
- "WHERE oid = '%u'::oid",
- agginfo->aggfn.dobj.catId.oid);
- }
res = ExecuteSqlQueryForSingleRow(fout, query->data);
@@ -13701,28 +12541,9 @@ dumpAgg(Archive *fout, AggInfo *agginfo)
return;
}
- if (fout->remoteVersion >= 70300)
- {
- /* If using 7.3's regproc or regtype, data is already quoted */
- appendPQExpBuffer(details, " SFUNC = %s,\n STYPE = %s",
- aggtransfn,
- aggtranstype);
- }
- else if (fout->remoteVersion >= 70100)
- {
- /* format_type quotes, regproc does not */
- appendPQExpBuffer(details, " SFUNC = %s,\n STYPE = %s",
- fmtId(aggtransfn),
- aggtranstype);
- }
- else
- {
- /* need quotes all around */
- appendPQExpBuffer(details, " SFUNC = %s,\n",
- fmtId(aggtransfn));
- appendPQExpBuffer(details, " STYPE = %s",
- fmtId(aggtranstype));
- }
+ /* regproc and regtype output is already sufficiently quoted */
+ appendPQExpBuffer(details, " SFUNC = %s,\n STYPE = %s",
+ aggtransfn, aggtranstype);
if (strcmp(aggtransspace, "0") != 0)
{
@@ -15121,19 +13942,9 @@ createViewAsClause(Archive *fout, TableInfo *tbinfo)
int len;
/* Fetch the view definition */
- if (fout->remoteVersion >= 70300)
- {
- /* Beginning in 7.3, viewname is not unique; rely on OID */
- appendPQExpBuffer(query,
+ appendPQExpBuffer(query,
"SELECT pg_catalog.pg_get_viewdef('%u'::pg_catalog.oid) AS viewdef",
- tbinfo->dobj.catId.oid);
- }
- else
- {
- appendPQExpBufferStr(query, "SELECT definition AS viewdef "
- "FROM pg_views WHERE viewname = ");
- appendStringLiteralAH(query, tbinfo->dobj.name, fout);
- }
+ tbinfo->dobj.catId.oid);
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
@@ -15370,17 +14181,10 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo)
{
appendPQExpBufferStr(q, " WITH OPTIONS");
}
- else if (fout->remoteVersion >= 70100)
- {
- appendPQExpBuffer(q, " %s",
- tbinfo->atttypnames[j]);
- }
else
{
- /* If no format_type, fake it */
appendPQExpBuffer(q, " %s",
- myFormatType(tbinfo->atttypnames[j],
- tbinfo->atttypmod[j]));
+ tbinfo->atttypnames[j]);
}
/* Add collation if not default for the type */
@@ -16283,52 +15087,6 @@ dumpTableConstraintComment(Archive *fout, ConstraintInfo *coninfo)
destroyPQExpBuffer(labelq);
}
-/*
- * findLastBuiltInOid -
- * find the last built in oid
- *
- * For 7.1 and 7.2, we do this by retrieving datlastsysoid from the
- * pg_database entry for the current database
- */
-static Oid
-findLastBuiltinOid_V71(Archive *fout, const char *dbname)
-{
- PGresult *res;
- Oid last_oid;
- PQExpBuffer query = createPQExpBuffer();
-
- resetPQExpBuffer(query);
- appendPQExpBufferStr(query, "SELECT datlastsysoid from pg_database where datname = ");
- appendStringLiteralAH(query, dbname, fout);
-
- res = ExecuteSqlQueryForSingleRow(fout, query->data);
- last_oid = atooid(PQgetvalue(res, 0, PQfnumber(res, "datlastsysoid")));
- PQclear(res);
- destroyPQExpBuffer(query);
- return last_oid;
-}
-
-/*
- * findLastBuiltInOid -
- * find the last built in oid
- *
- * For 7.0, we do this by assuming that the last thing that initdb does is to
- * create the pg_indexes view. This sucks in general, but seeing that 7.0.x
- * initdb won't be changing anymore, it'll do.
- */
-static Oid
-findLastBuiltinOid_V70(Archive *fout)
-{
- PGresult *res;
- int last_oid;
-
- res = ExecuteSqlQueryForSingleRow(fout,
- "SELECT oid FROM pg_class WHERE relname = 'pg_indexes'");
- last_oid = atooid(PQgetvalue(res, 0, PQfnumber(res, "oid")));
- PQclear(res);
- return last_oid;
-}
-
/*
* dumpSequence
* write the declaration (not data) of one user-defined sequence
@@ -16703,13 +15461,9 @@ dumpTrigger(Archive *fout, TriggerInfo *tginfo)
{
if (OidIsValid(tginfo->tgconstrrelid))
{
- /* If we are using regclass, name is already quoted */
- if (fout->remoteVersion >= 70300)
- appendPQExpBuffer(query, " FROM %s\n ",
- tginfo->tgconstrrelname);
- else
- appendPQExpBuffer(query, " FROM %s\n ",
- fmtId(tginfo->tgconstrrelname));
+ /* regclass output is already quoted */
+ appendPQExpBuffer(query, " FROM %s\n ",
+ tginfo->tgconstrrelname);
}
if (!tginfo->tgdeferrable)
appendPQExpBufferStr(query, "NOT ");
@@ -16725,13 +15479,9 @@ dumpTrigger(Archive *fout, TriggerInfo *tginfo)
else
appendPQExpBufferStr(query, " FOR EACH STATEMENT\n ");
- /* In 7.3, result of regproc is already quoted */
- if (fout->remoteVersion >= 70300)
- appendPQExpBuffer(query, "EXECUTE PROCEDURE %s(",
- tginfo->tgfname);
- else
- appendPQExpBuffer(query, "EXECUTE PROCEDURE %s(",
- fmtId(tginfo->tgfname));
+ /* regproc output is already sufficiently quoted */
+ appendPQExpBuffer(query, "EXECUTE PROCEDURE %s(",
+ tginfo->tgfname);
tgargs = (char *) PQunescapeBytea((unsigned char *) tginfo->tgargs,
&lentgargs);
@@ -16923,19 +15673,9 @@ dumpRule(Archive *fout, RuleInfo *rinfo)
delcmd = createPQExpBuffer();
labelq = createPQExpBuffer();
- if (fout->remoteVersion >= 70300)
- {
- appendPQExpBuffer(query,
- "SELECT pg_catalog.pg_get_ruledef('%u'::pg_catalog.oid) AS definition",
- rinfo->dobj.catId.oid);
- }
- else
- {
- /* Rule name was unique before 7.3 ... */
- appendPQExpBuffer(query,
- "SELECT pg_get_ruledef('%s') AS definition",
- rinfo->dobj.name);
- }
+ appendPQExpBuffer(query,
+ "SELECT pg_catalog.pg_get_ruledef('%u'::pg_catalog.oid) AS definition",
+ rinfo->dobj.catId.oid);
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
@@ -17325,10 +16065,6 @@ getDependencies(Archive *fout)
DumpableObject *dobj,
*refdobj;
- /* No dependency info available before 7.3 */
- if (fout->remoteVersion < 70300)
- return;
-
if (g_verbose)
write_msg(NULL, "reading dependency data\n");
@@ -17680,10 +16416,6 @@ selectSourceSchema(Archive *fout, const char *schemaName)
/* This is checked by the callers already */
Assert(schemaName != NULL && *schemaName != '\0');
- /* Not relevant if fetching from pre-7.3 DB */
- if (fout->remoteVersion < 70300)
- return;
-
query = createPQExpBuffer();
appendPQExpBuffer(query, "SET search_path = %s",
fmtId(schemaName));
@@ -17699,8 +16431,8 @@ selectSourceSchema(Archive *fout, const char *schemaName)
* getFormattedTypeName - retrieve a nicely-formatted type name for the
* given type name.
*
- * NB: in 7.3 and up the result may depend on the currently-selected
- * schema; this is why we don't try to cache the names.
+ * NB: the result may depend on the currently-selected search_path; this is
+ * why we don't try to cache the names.
*/
static char *
getFormattedTypeName(Archive *fout, Oid oid, OidOptions opts)
@@ -17722,36 +16454,13 @@ getFormattedTypeName(Archive *fout, Oid oid, OidOptions opts)
}
query = createPQExpBuffer();
- if (fout->remoteVersion >= 70300)
- {
- appendPQExpBuffer(query, "SELECT pg_catalog.format_type('%u'::pg_catalog.oid, NULL)",
- oid);
- }
- else if (fout->remoteVersion >= 70100)
- {
- appendPQExpBuffer(query, "SELECT format_type('%u'::oid, NULL)",
- oid);
- }
- else
- {
- appendPQExpBuffer(query, "SELECT typname "
- "FROM pg_type "
- "WHERE oid = '%u'::oid",
- oid);
- }
+ appendPQExpBuffer(query, "SELECT pg_catalog.format_type('%u'::pg_catalog.oid, NULL)",
+ oid);
res = ExecuteSqlQueryForSingleRow(fout, query->data);
- if (fout->remoteVersion >= 70100)
- {
- /* already quoted */
- result = pg_strdup(PQgetvalue(res, 0, 0));
- }
- else
- {
- /* may need to quote it */
- result = pg_strdup(fmtId(PQgetvalue(res, 0, 0)));
- }
+ /* result of format_type is already quoted */
+ result = pg_strdup(PQgetvalue(res, 0, 0));
PQclear(res);
destroyPQExpBuffer(query);
@@ -17759,76 +16468,6 @@ getFormattedTypeName(Archive *fout, Oid oid, OidOptions opts)
return result;
}
-/*
- * myFormatType --- local implementation of format_type for use with 7.0.
- */
-static char *
-myFormatType(const char *typname, int32 typmod)
-{
- char *result;
- bool isarray = false;
- PQExpBuffer buf = createPQExpBuffer();
-
- /* Handle array types */
- if (typname[0] == '_')
- {
- isarray = true;
- typname++;
- }
-
- /* Show lengths on bpchar and varchar */
- if (strcmp(typname, "bpchar") == 0)
- {
- int len = (typmod - VARHDRSZ);
-
- appendPQExpBufferStr(buf, "character");
- if (len > 1)
- appendPQExpBuffer(buf, "(%d)",
- typmod - VARHDRSZ);
- }
- else if (strcmp(typname, "varchar") == 0)
- {
- appendPQExpBufferStr(buf, "character varying");
- if (typmod != -1)
- appendPQExpBuffer(buf, "(%d)",
- typmod - VARHDRSZ);
- }
- else if (strcmp(typname, "numeric") == 0)
- {
- appendPQExpBufferStr(buf, "numeric");
- if (typmod != -1)
- {
- int32 tmp_typmod;
- int precision;
- int scale;
-
- tmp_typmod = typmod - VARHDRSZ;
- precision = (tmp_typmod >> 16) & 0xffff;
- scale = tmp_typmod & 0xffff;
- appendPQExpBuffer(buf, "(%d,%d)",
- precision, scale);
- }
- }
-
- /*
- * char is an internal single-byte data type; Let's make sure we force it
- * through with quotes. - thomas 1998-12-13
- */
- else if (strcmp(typname, "char") == 0)
- appendPQExpBufferStr(buf, "\"char\"");
- else
- appendPQExpBufferStr(buf, fmtId(typname));
-
- /* Append array qualifier for array types */
- if (isarray)
- appendPQExpBufferStr(buf, "[]");
-
- result = pg_strdup(buf->data);
- destroyPQExpBuffer(buf);
-
- return result;
-}
-
/*
* Return a column list clause for the given relation.
*
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index 2bfa2d9742..a60cf95733 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -605,7 +605,6 @@ extern void parseOidArray(const char *str, Oid *array, int arraysize);
extern void sortDumpableObjects(DumpableObject **objs, int numObjs,
DumpId preBoundaryId, DumpId postBoundaryId);
extern void sortDumpableObjectsByTypeName(DumpableObject **objs, int numObjs);
-extern void sortDumpableObjectsByTypeOid(DumpableObject **objs, int numObjs);
extern void sortDataAndIndexObjectsBySize(DumpableObject **objs, int numObjs);
/*
diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c
index d87f08d356..195b84a0d4 100644
--- a/src/bin/pg_dump/pg_dump_sort.c
+++ b/src/bin/pg_dump/pg_dump_sort.c
@@ -23,63 +23,7 @@
static const char *modulename = gettext_noop("sorter");
/*
- * Sort priority for object types when dumping a pre-7.3 database.
- * Objects are sorted by priority levels, and within an equal priority level
- * by OID. (This is a relatively crude hack to provide semi-reasonable
- * behavior for old databases without full dependency info.) Note: collations,
- * extensions, text search, foreign-data, materialized view, event trigger,
- * policies, transforms, access methods and default ACL objects can't really
- * happen here, so the rather bogus priorities for them don't matter.
- *
- * NOTE: object-type priorities must match the section assignments made in
- * pg_dump.c; that is, PRE_DATA objects must sort before DO_PRE_DATA_BOUNDARY,
- * POST_DATA objects must sort after DO_POST_DATA_BOUNDARY, and DATA objects
- * must sort between them.
- */
-static const int oldObjectTypePriority[] =
-{
- 1, /* DO_NAMESPACE */
- 1, /* DO_EXTENSION */
- 2, /* DO_TYPE */
- 2, /* DO_SHELL_TYPE */
- 2, /* DO_FUNC */
- 3, /* DO_AGG */
- 3, /* DO_OPERATOR */
- 3, /* DO_ACCESS_METHOD */
- 4, /* DO_OPCLASS */
- 4, /* DO_OPFAMILY */
- 4, /* DO_COLLATION */
- 5, /* DO_CONVERSION */
- 6, /* DO_TABLE */
- 8, /* DO_ATTRDEF */
- 15, /* DO_INDEX */
- 16, /* DO_RULE */
- 17, /* DO_TRIGGER */
- 14, /* DO_CONSTRAINT */
- 18, /* DO_FK_CONSTRAINT */
- 2, /* DO_PROCLANG */
- 2, /* DO_CAST */
- 11, /* DO_TABLE_DATA */
- 7, /* DO_DUMMY_TYPE */
- 4, /* DO_TSPARSER */
- 4, /* DO_TSDICT */
- 4, /* DO_TSTEMPLATE */
- 4, /* DO_TSCONFIG */
- 4, /* DO_FDW */
- 4, /* DO_FOREIGN_SERVER */
- 19, /* DO_DEFAULT_ACL */
- 4, /* DO_TRANSFORM */
- 9, /* DO_BLOB */
- 12, /* DO_BLOB_DATA */
- 10, /* DO_PRE_DATA_BOUNDARY */
- 13, /* DO_POST_DATA_BOUNDARY */
- 20, /* DO_EVENT_TRIGGER */
- 15, /* DO_REFRESH_MATVIEW */
- 21 /* DO_POLICY */
-};
-
-/*
- * Sort priority for object types when dumping newer databases.
+ * Sort priority for database object types.
* Objects are sorted by type, and within a type by name.
*
* NOTE: object-type priorities must match the section assignments made in
@@ -87,7 +31,7 @@ static const int oldObjectTypePriority[] =
* POST_DATA objects must sort after DO_POST_DATA_BOUNDARY, and DATA objects
* must sort between them.
*/
-static const int newObjectTypePriority[] =
+static const int dbObjectTypePriority[] =
{
1, /* DO_NAMESPACE */
4, /* DO_EXTENSION */
@@ -134,7 +78,6 @@ static DumpId postDataBoundId;
static int DOTypeNameCompare(const void *p1, const void *p2);
-static int DOTypeOidCompare(const void *p1, const void *p2);
static bool TopoSort(DumpableObject **objs,
int numObjs,
DumpableObject **ordering,
@@ -266,8 +209,8 @@ DOTypeNameCompare(const void *p1, const void *p2)
int cmpval;
/* Sort by type */
- cmpval = newObjectTypePriority[obj1->objType] -
- newObjectTypePriority[obj2->objType];
+ cmpval = dbObjectTypePriority[obj1->objType] -
+ dbObjectTypePriority[obj2->objType];
if (cmpval != 0)
return cmpval;
@@ -345,37 +288,6 @@ DOTypeNameCompare(const void *p1, const void *p2)
}
-/*
- * Sort the given objects into a type/OID-based ordering
- *
- * This is used with pre-7.3 source databases as a crude substitute for the
- * lack of dependency information.
- */
-void
-sortDumpableObjectsByTypeOid(DumpableObject **objs, int numObjs)
-{
- if (numObjs > 1)
- qsort((void *) objs, numObjs, sizeof(DumpableObject *),
- DOTypeOidCompare);
-}
-
-static int
-DOTypeOidCompare(const void *p1, const void *p2)
-{
- DumpableObject *obj1 = *(DumpableObject *const *) p1;
- DumpableObject *obj2 = *(DumpableObject *const *) p2;
- int cmpval;
-
- cmpval = oldObjectTypePriority[obj1->objType] -
- oldObjectTypePriority[obj2->objType];
-
- if (cmpval != 0)
- return cmpval;
-
- return oidcmp(obj1->catId.oid, obj2->catId.oid);
-}
-
-
/*
* Sort the given objects into a safe dump order using dependency
* information (to the extent we have it available).
diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c
index b5efb46019..82157e5620 100644
--- a/src/bin/pg_dump/pg_dumpall.c
+++ b/src/bin/pg_dump/pg_dumpall.c
@@ -480,10 +480,7 @@ main(int argc, char *argv[])
dropDBs(conn);
if (!roles_only && !no_tablespaces)
- {
- if (server_version >= 80000)
- dropTablespaces(conn);
- }
+ dropTablespaces(conn);
if (!tablespaces_only)
dropRoles(conn);
@@ -505,12 +502,9 @@ main(int argc, char *argv[])
dumpGroups(conn);
}
+ /* Dump tablespaces */
if (!roles_only && !no_tablespaces)
- {
- /* Dump tablespaces */
- if (server_version >= 80000)
- dumpTablespaces(conn);
- }
+ dumpTablespaces(conn);
/* Dump CREATE DATABASE commands */
if (binary_upgrade || (!globals_only && !roles_only && !tablespaces_only))
@@ -886,9 +880,8 @@ dumpRoles(PGconn *conn)
* We do it this way because config settings for roles could mention the
* names of other roles.
*/
- if (server_version >= 70300)
- for (i = 0; i < PQntuples(res); i++)
- dumpUserConfig(conn, PQgetvalue(res, i, i_rolname));
+ for (i = 0; i < PQntuples(res); i++)
+ dumpUserConfig(conn, PQgetvalue(res, i, i_rolname));
PQclear(res);
@@ -1204,16 +1197,10 @@ dropDBs(PGconn *conn)
PGresult *res;
int i;
- if (server_version >= 70100)
- res = executeQuery(conn,
- "SELECT datname "
- "FROM pg_database d "
- "WHERE datallowconn ORDER BY 1");
- else
- res = executeQuery(conn,
- "SELECT datname "
- "FROM pg_database d "
- "ORDER BY 1");
+ res = executeQuery(conn,
+ "SELECT datname "
+ "FROM pg_database d "
+ "WHERE datallowconn ORDER BY 1");
if (PQntuples(res) > 0)
fprintf(OPF, "--\n-- Drop databases\n--\n\n");
@@ -1269,12 +1256,10 @@ dumpCreateDB(PGconn *conn)
* We will dump encoding and locale specifications in the CREATE DATABASE
* commands for just those databases with values different from defaults.
*
- * We consider template0's encoding and locale (or, pre-7.1, template1's)
- * to define the installation default. Pre-8.4 installations do not have
- * per-database locale settings; for them, every database must necessarily
- * be using the installation default, so there's no need to do anything
- * (which is good, since in very old versions there is no good way to find
- * out what the installation locale is anyway...)
+ * We consider template0's encoding and locale to define the installation
+ * default. Pre-8.4 installations do not have per-database locale
+ * settings; for them, every database must necessarily be using the
+ * installation default, so there's no need to do anything.
*/
if (server_version >= 80400)
res = executeQuery(conn,
@@ -1282,18 +1267,12 @@ dumpCreateDB(PGconn *conn)
"datcollate, datctype "
"FROM pg_database "
"WHERE datname = 'template0'");
- else if (server_version >= 70100)
- res = executeQuery(conn,
- "SELECT pg_encoding_to_char(encoding), "
- "null::text AS datcollate, null::text AS datctype "
- "FROM pg_database "
- "WHERE datname = 'template0'");
else
res = executeQuery(conn,
"SELECT pg_encoding_to_char(encoding), "
"null::text AS datcollate, null::text AS datctype "
"FROM pg_database "
- "WHERE datname = 'template1'");
+ "WHERE datname = 'template0'");
/* If for some reason the template DB isn't there, treat as unknown */
if (PQntuples(res) > 0)
@@ -1371,7 +1350,7 @@ dumpCreateDB(PGconn *conn)
"(SELECT spcname FROM pg_tablespace t WHERE t.oid = d.dattablespace) AS dattablespace "
"FROM pg_database d LEFT JOIN pg_authid u ON (datdba = u.oid) "
"WHERE datallowconn ORDER BY 1");
- else if (server_version >= 80000)
+ else
res = executeQuery(conn,
"SELECT datname, "
"coalesce(usename, (select usename from pg_shadow where usesysid=(select datdba from pg_database where datname='template0'))), "
@@ -1382,47 +1361,6 @@ dumpCreateDB(PGconn *conn)
"(SELECT spcname FROM pg_tablespace t WHERE t.oid = d.dattablespace) AS dattablespace "
"FROM pg_database d LEFT JOIN pg_shadow u ON (datdba = usesysid) "
"WHERE datallowconn ORDER BY 1");
- else if (server_version >= 70300)
- res = executeQuery(conn,
- "SELECT datname, "
- "coalesce(usename, (select usename from pg_shadow where usesysid=(select datdba from pg_database where datname='template0'))), "
- "pg_encoding_to_char(d.encoding), "
- "null::text AS datcollate, null::text AS datctype, datfrozenxid, 0 AS datminmxid, "
- "datistemplate, datacl, '' as rdatacl, "
- "-1 as datconnlimit, "
- "'pg_default' AS dattablespace "
- "FROM pg_database d LEFT JOIN pg_shadow u ON (datdba = usesysid) "
- "WHERE datallowconn ORDER BY 1");
- else if (server_version >= 70100)
- res = executeQuery(conn,
- "SELECT datname, "
- "coalesce("
- "(select usename from pg_shadow where usesysid=datdba), "
- "(select usename from pg_shadow where usesysid=(select datdba from pg_database where datname='template0'))), "
- "pg_encoding_to_char(d.encoding), "
- "null::text AS datcollate, null::text AS datctype, 0 AS datfrozenxid, 0 AS datminmxid, "
- "datistemplate, '' as datacl, '' as rdatacl, "
- "-1 as datconnlimit, "
- "'pg_default' AS dattablespace "
- "FROM pg_database d "
- "WHERE datallowconn ORDER BY 1");
- else
- {
- /*
- * Note: 7.0 fails to cope with sub-select in COALESCE, so just deal
- * with getting a NULL by not printing any OWNER clause.
- */
- res = executeQuery(conn,
- "SELECT datname, "
- "(select usename from pg_shadow where usesysid=datdba), "
- "pg_encoding_to_char(d.encoding), "
- "null::text AS datcollate, null::text AS datctype, 0 AS datfrozenxid, 0 AS datminmxid, "
- "'f' as datistemplate, "
- "'' as datacl, '' as rdatacl, -1 as datconnlimit, "
- "'pg_default' AS dattablespace "
- "FROM pg_database d "
- "ORDER BY 1");
- }
for (i = 0; i < PQntuples(res); i++)
{
@@ -1541,8 +1479,7 @@ dumpCreateDB(PGconn *conn)
fprintf(OPF, "%s", buf->data);
- if (server_version >= 70300)
- dumpDatabaseConfig(conn, dbname);
+ dumpDatabaseConfig(conn, dbname);
free(fdbname);
}
@@ -1738,10 +1675,7 @@ dumpDatabases(PGconn *conn)
PGresult *res;
int i;
- if (server_version >= 70100)
- res = executeQuery(conn, "SELECT datname FROM pg_database WHERE datallowconn ORDER BY 1");
- else
- res = executeQuery(conn, "SELECT datname FROM pg_database ORDER BY 1");
+ res = executeQuery(conn, "SELECT datname FROM pg_database WHERE datallowconn ORDER BY 1");
for (i = 0; i < PQntuples(res); i++)
{
@@ -2062,11 +1996,11 @@ connectDatabase(const char *dbname, const char *connection_string,
my_version = PG_VERSION_NUM;
/*
- * We allow the server to be back to 7.0, and up to any minor release of
+ * We allow the server to be back to 8.0, and up to any minor release of
* our own major version. (See also version check in pg_dump.c.)
*/
if (my_version != server_version
- && (server_version < 70000 ||
+ && (server_version < 80000 ||
(server_version / 100) > (my_version / 100)))
{
fprintf(stderr, _("server version: %s; %s version: %s\n"),
@@ -2076,11 +2010,9 @@ connectDatabase(const char *dbname, const char *connection_string,
}
/*
- * On 7.3 and later, make sure we are not fooled by non-system schemas in
- * the search path.
+ * Make sure we are not fooled by non-system schemas in the search path.
*/
- if (server_version >= 70300)
- executeCommand(conn, "SET search_path = pg_catalog");
+ executeCommand(conn, "SET search_path = pg_catalog");
return conn;
}
--
cgit v1.2.3
From 248776ea06c240ae4605e77369d66bcd7ae4f9e3 Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Wed, 12 Oct 2016 17:01:19 -0700
Subject: Remove spurious word.
Tatsuo Ishii
---
doc/src/sgml/parallel.sgml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/parallel.sgml b/doc/src/sgml/parallel.sgml
index c80d42dbef..1e71529eeb 100644
--- a/doc/src/sgml/parallel.sgml
+++ b/doc/src/sgml/parallel.sgml
@@ -241,7 +241,7 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
than normal but would produce incorrect results. Instead, the parallel
portion of the plan must be what is known internally to the query
optimizer as a partial plan>; that is, it must constructed
- so that each process will which executes the plan will generate only a
+ so that each process which executes the plan will generate only a
subset of the output rows in such a way that each required output row
is guaranteed to be generated by exactly one of the cooperating processes.
--
cgit v1.2.3
From b1ee762a6138df073d4b2b80c235dd9025a8532c Mon Sep 17 00:00:00 2001
From: Tatsuo Ishii
Date: Fri, 14 Oct 2016 07:45:25 +0900
Subject: Fix typo.
Confirmed by Tom Lane.
---
doc/src/sgml/parallel.sgml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/parallel.sgml b/doc/src/sgml/parallel.sgml
index 1e71529eeb..94170b770b 100644
--- a/doc/src/sgml/parallel.sgml
+++ b/doc/src/sgml/parallel.sgml
@@ -189,7 +189,7 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
Even when parallel query plan is generated for a particular query, there
are several circumstances under which it will be impossible to execute
that plan in parallel at execution time. If this occurs, the leader
- will execute the portion of the plan between below the Gather>
+ will execute the portion of the plan below the Gather>
node entirely by itself, almost as if the Gather> node were
not present. This will happen if any of the following conditions are met:
--
cgit v1.2.3
From 13d3180fd14c624bbb274e200e98ddb50e260216 Mon Sep 17 00:00:00 2001
From: Tatsuo Ishii
Date: Fri, 14 Oct 2016 09:03:25 +0900
Subject: Fix typo.
Confirmed by Michael Paquier.
---
doc/src/sgml/parallel.sgml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/parallel.sgml b/doc/src/sgml/parallel.sgml
index 94170b770b..d0b438e889 100644
--- a/doc/src/sgml/parallel.sgml
+++ b/doc/src/sgml/parallel.sgml
@@ -339,7 +339,7 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
When executing a parallel plan, you can use EXPLAIN (ANALYZE,
- VERBOSE) will display per-worker statistics for each plan node.
+ VERBOSE) to display per-worker statistics for each plan node.
This may be useful in determining whether the work is being evenly
distributed between all plan nodes and more generally in understanding the
performance characteristics of the plan.
--
cgit v1.2.3
From 7d3235ba42f8d5fc70c58e242702cc5e2e3549a6 Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Mon, 17 Oct 2016 16:31:13 -0400
Subject: By default, set log_line_prefix = '%m [%p] '.
This value might not be to everyone's taste; in particular, some
people might prefer %t to %m, and others may want %u, %d, or other
fields. However, it's a vast improvement on the old default of ''.
Christoph Berg
---
doc/src/sgml/config.sgml | 14 +++++++++++++-
src/backend/utils/misc/guc.c | 2 +-
src/backend/utils/misc/postgresql.conf.sample | 2 +-
3 files changed, 15 insertions(+), 3 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index e826c19698..99ff9f5ab5 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -5004,7 +5004,8 @@ local0.* /var/log/postgresql
value will pad on the left. Padding can be useful to aid human
readability in log files.
This parameter can only be set in the postgresql.conf>
- file or on the server command line. The default is an empty string.
+ file or on the server command line. The default is
+ '%m [%p] '> which logs a time stamp and the process ID.
@@ -5142,6 +5143,17 @@ FROM pg_stat_activity;
include those escapes if you are logging to syslog>.
+
+
+
+ The %q> escape is useful when including information that is
+ only available in session (backend) context like user or database
+ name. For example:
+
+log_line_prefix = '%m [%p] %q%u@%d/%a '
+
+
+
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 622279b058..65660c1bf7 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3014,7 +3014,7 @@ static struct config_string ConfigureNamesString[] =
gettext_noop("If blank, no prefix is used.")
},
&Log_line_prefix,
- "",
+ "%m [%p] ",
NULL, NULL, NULL
},
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 05b1373594..159ada3bc6 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -430,7 +430,7 @@
#log_duration = off
#log_error_verbosity = default # terse, default, or verbose messages
#log_hostname = off
-#log_line_prefix = '' # special values:
+#log_line_prefix = '%m [%p] ' # special values:
# %a = application name
# %u = user name
# %d = database name
--
cgit v1.2.3
From 0be22457d730da8971f761b9c948f742a12b50b2 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Wed, 19 Oct 2016 12:00:00 -0400
Subject: pg_ctl: Add long options for -w and -W
From: Vik Fearing
---
doc/src/sgml/ref/pg_ctl-ref.sgml | 2 ++
src/bin/pg_ctl/pg_ctl.c | 6 ++++--
2 files changed, 6 insertions(+), 2 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/pg_ctl-ref.sgml b/doc/src/sgml/ref/pg_ctl-ref.sgml
index a00c355f4a..11444e85a8 100644
--- a/doc/src/sgml/ref/pg_ctl-ref.sgml
+++ b/doc/src/sgml/ref/pg_ctl-ref.sgml
@@ -383,6 +383,7 @@ PostgreSQL documentation
-w
+
--wait
Wait for an operation to complete. This is supported for the
@@ -415,6 +416,7 @@ PostgreSQL documentation
-W
+
--no-wait
Do not wait for an operation to complete. This is the opposite of the
diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c
index ab10d2f25c..7d97232613 100644
--- a/src/bin/pg_ctl/pg_ctl.c
+++ b/src/bin/pg_ctl/pg_ctl.c
@@ -1959,8 +1959,8 @@ do_help(void)
printf(_(" -s, --silent only print errors, no informational messages\n"));
printf(_(" -t, --timeout=SECS seconds to wait when using -w option\n"));
printf(_(" -V, --version output version information, then exit\n"));
- printf(_(" -w wait until operation completes\n"));
- printf(_(" -W do not wait until operation completes\n"));
+ printf(_(" -w, --wait wait until operation completes\n"));
+ printf(_(" -W, --no-wait do not wait until operation completes\n"));
printf(_(" -?, --help show this help, then exit\n"));
printf(_("(The default is to wait for shutdown, but not for start or restart.)\n\n"));
printf(_("If the -D option is omitted, the environment variable PGDATA is used.\n"));
@@ -2174,6 +2174,8 @@ main(int argc, char **argv)
{"silent", no_argument, NULL, 's'},
{"timeout", required_argument, NULL, 't'},
{"core-files", no_argument, NULL, 'c'},
+ {"wait", no_argument, NULL, 'w'},
+ {"no-wait", no_argument, NULL, 'W'},
{NULL, 0, NULL, 0}
};
--
cgit v1.2.3
From c709c6074083a8cc5f1ba431c741ff04e3a8a906 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Wed, 19 Oct 2016 12:00:00 -0400
Subject: doc: Consistently use = sign in long options synopses
This was already the predominant form in man pages and help output.
---
doc/src/sgml/ref/clusterdb.sgml | 2 +-
doc/src/sgml/ref/pg_ctl-ref.sgml | 6 +++---
doc/src/sgml/ref/pgupgrade.sgml | 2 +-
doc/src/sgml/ref/reindexdb.sgml | 2 +-
doc/src/sgml/ref/vacuumdb.sgml | 2 +-
5 files changed, 7 insertions(+), 7 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/clusterdb.sgml b/doc/src/sgml/ref/clusterdb.sgml
index c13d74853e..67582fd6e6 100644
--- a/doc/src/sgml/ref/clusterdb.sgml
+++ b/doc/src/sgml/ref/clusterdb.sgml
@@ -316,7 +316,7 @@ PostgreSQL documentation
foo in a database named
xyzzy:
-$ clusterdb --table foo xyzzy
+$ clusterdb --table=foo xyzzy
diff --git a/doc/src/sgml/ref/pg_ctl-ref.sgml b/doc/src/sgml/ref/pg_ctl-ref.sgml
index 11444e85a8..ea0a6353d8 100644
--- a/doc/src/sgml/ref/pg_ctl-ref.sgml
+++ b/doc/src/sgml/ref/pg_ctl-ref.sgml
@@ -263,7 +263,7 @@ PostgreSQL documentation
-D datadir
-
--pgdata datadir
+
--pgdata=datadir
Specifies the file system location of the database configuration files. If
@@ -275,7 +275,7 @@ PostgreSQL documentation
-l filename
-
--log filename
+
--log=filename
Append the server log output to
@@ -288,7 +288,7 @@ PostgreSQL documentation
-m mode
-
--mode mode
+
--mode=mode
Specifies the shutdown mode. mode
diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml
index 96851933cc..d46a730f66 100644
--- a/doc/src/sgml/ref/pgupgrade.sgml
+++ b/doc/src/sgml/ref/pgupgrade.sgml
@@ -558,7 +558,7 @@ rsync --archive --delete --hard-links --size-only old_pgdata new_pgdata remote_d
run using:
-psql --username postgres --file script.sql postgres
+psql --username=postgres --file=script.sql postgres
The scripts can be run in any order and can be deleted once they have
diff --git a/doc/src/sgml/ref/reindexdb.sgml b/doc/src/sgml/ref/reindexdb.sgml
index 713efc099b..36df949c95 100644
--- a/doc/src/sgml/ref/reindexdb.sgml
+++ b/doc/src/sgml/ref/reindexdb.sgml
@@ -396,7 +396,7 @@ PostgreSQL documentation
To reindex the table foo and the index
bar in a database named abcd:
-$ reindexdb --table foo --index bar abcd
+$ reindexdb --table=foo --index=bar abcd
diff --git a/doc/src/sgml/ref/vacuumdb.sgml b/doc/src/sgml/ref/vacuumdb.sgml
index 92b8984b7a..4f6fa0d708 100644
--- a/doc/src/sgml/ref/vacuumdb.sgml
+++ b/doc/src/sgml/ref/vacuumdb.sgml
@@ -430,7 +430,7 @@ PostgreSQL documentation
xyzzy, and analyze a single column
bar of the table for the optimizer:
-$ vacuumdb --analyze --verbose --table 'foo(bar)' xyzzy
+$ vacuumdb --analyze --verbose --table='foo(bar)' xyzzy
--
cgit v1.2.3
From caf936b09fc7c74844575332b07c667a178cb078 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Wed, 19 Oct 2016 12:00:00 -0400
Subject: pg_ctl: Add long option for -o
Now all normally used options are covered by long options as well.
---
doc/src/sgml/ref/pg_ctl-ref.sgml | 2 ++
src/bin/pg_ctl/pg_ctl.c | 3 ++-
2 files changed, 4 insertions(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/pg_ctl-ref.sgml b/doc/src/sgml/ref/pg_ctl-ref.sgml
index ea0a6353d8..5fb6898699 100644
--- a/doc/src/sgml/ref/pg_ctl-ref.sgml
+++ b/doc/src/sgml/ref/pg_ctl-ref.sgml
@@ -301,6 +301,7 @@ PostgreSQL documentation
-o options
+
--options=options
Specifies options to be passed directly to the
@@ -316,6 +317,7 @@ PostgreSQL documentation
-o initdb-options
+
--options=initdb-options
Specifies options to be passed directly to the
diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c
index 7d97232613..4b476022c0 100644
--- a/src/bin/pg_ctl/pg_ctl.c
+++ b/src/bin/pg_ctl/pg_ctl.c
@@ -1972,7 +1972,7 @@ do_help(void)
printf(_(" -c, --core-files not applicable on this platform\n"));
#endif
printf(_(" -l, --log=FILENAME write (or append) server log to FILENAME\n"));
- printf(_(" -o OPTIONS command line options to pass to postgres\n"
+ printf(_(" -o, --options=OPTIONS command line options to pass to postgres\n"
" (PostgreSQL server executable) or initdb\n"));
printf(_(" -p PATH-TO-POSTGRES normally not necessary\n"));
printf(_("\nOptions for stop or restart:\n"));
@@ -2171,6 +2171,7 @@ main(int argc, char **argv)
{"log", required_argument, NULL, 'l'},
{"mode", required_argument, NULL, 'm'},
{"pgdata", required_argument, NULL, 'D'},
+ {"options", required_argument, NULL, 'o'},
{"silent", no_argument, NULL, 's'},
{"timeout", required_argument, NULL, 't'},
{"core-files", no_argument, NULL, 'c'},
--
cgit v1.2.3
From 5d58c07a441414ae29a8e315d2f9868d3d8e20be Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Wed, 19 Oct 2016 12:00:00 -0400
Subject: initdb pg_basebackup: Rename --noxxx options to --no-xxx
--noclean and --nosync were the only options spelled without a hyphen,
so change this for consistency with other options. The options in
pg_basebackup have not been in a release, so we just rename them. For
initdb, we retain the old variants.
Vik Fearing and me
---
doc/src/sgml/ref/initdb.sgml | 4 ++--
doc/src/sgml/ref/pg_basebackup.sgml | 4 ++--
src/bin/initdb/initdb.c | 12 +++++++-----
src/bin/pg_basebackup/pg_basebackup.c | 8 ++++----
src/bin/pg_basebackup/t/010_pg_basebackup.pl | 2 +-
src/test/perl/PostgresNode.pm | 2 +-
src/test/regress/pg_regress.c | 2 +-
7 files changed, 18 insertions(+), 16 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index 4e339ecce8..31f081ae7a 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -235,7 +235,7 @@ PostgreSQL documentation
-N
-
--nosync
+
--no-sync
By default, initdb will wait for all files to be
@@ -355,7 +355,7 @@ PostgreSQL documentation
-n
-
--noclean
+
--no-clean
By default, when initdb
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index 55e913f70d..7cb690dded 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -400,7 +400,7 @@ PostgreSQL documentation
-n
-
--noclean
+
--no-clean
By default, when pg_basebackup aborts with an
@@ -440,7 +440,7 @@ PostgreSQL documentation
-N
-
--nosync
+
--no-sync
By default, pg_basebackup will wait for all files
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index e52e67df61..9e23f64130 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -2402,8 +2402,8 @@ usage(const char *progname)
printf(_(" -d, --debug generate lots of debugging output\n"));
printf(_(" -k, --data-checksums use data page checksums\n"));
printf(_(" -L DIRECTORY where to find the input files\n"));
- printf(_(" -n, --noclean do not clean up after errors\n"));
- printf(_(" -N, --nosync do not wait for changes to be written safely to disk\n"));
+ printf(_(" -n, --no-clean do not clean up after errors\n"));
+ printf(_(" -N, --no-sync do not wait for changes to be written safely to disk\n"));
printf(_(" -s, --show show internal settings\n"));
printf(_(" -S, --sync-only only sync data directory\n"));
printf(_("\nOther options:\n"));
@@ -3078,8 +3078,10 @@ main(int argc, char *argv[])
{"version", no_argument, NULL, 'V'},
{"debug", no_argument, NULL, 'd'},
{"show", no_argument, NULL, 's'},
- {"noclean", no_argument, NULL, 'n'},
- {"nosync", no_argument, NULL, 'N'},
+ {"noclean", no_argument, NULL, 'n'}, /* for backwards compatibility */
+ {"no-clean", no_argument, NULL, 'n'},
+ {"nosync", no_argument, NULL, 'N'}, /* for backwards compatibility */
+ {"no-sync", no_argument, NULL, 'N'},
{"sync-only", no_argument, NULL, 'S'},
{"xlogdir", required_argument, NULL, 'X'},
{"data-checksums", no_argument, NULL, 'k'},
@@ -3165,7 +3167,7 @@ main(int argc, char *argv[])
break;
case 'n':
noclean = true;
- printf(_("Running in noclean mode. Mistakes will not be cleaned up.\n"));
+ printf(_("Running in no-clean mode. Mistakes will not be cleaned up.\n"));
break;
case 'N':
do_sync = false;
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 0f5d9d6a87..76e8f449fe 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -329,8 +329,8 @@ usage(void)
printf(_(" -c, --checkpoint=fast|spread\n"
" set fast or spread checkpointing\n"));
printf(_(" -l, --label=LABEL set backup label\n"));
- printf(_(" -n, --noclean do not clean up after errors\n"));
- printf(_(" -N, --nosync do not wait for changes to be written safely to disk\n"));
+ printf(_(" -n, --no-clean do not clean up after errors\n"));
+ printf(_(" -N, --no-sync do not wait for changes to be written safely to disk\n"));
printf(_(" -P, --progress show progress information\n"));
printf(_(" -v, --verbose output verbose messages\n"));
printf(_(" -V, --version output version information, then exit\n"));
@@ -2006,8 +2006,8 @@ main(int argc, char **argv)
{"gzip", no_argument, NULL, 'z'},
{"compress", required_argument, NULL, 'Z'},
{"label", required_argument, NULL, 'l'},
- {"noclean", no_argument, NULL, 'n'},
- {"nosync", no_argument, NULL, 'N'},
+ {"no-clean", no_argument, NULL, 'n'},
+ {"no-sync", no_argument, NULL, 'N'},
{"dbname", required_argument, NULL, 'd'},
{"host", required_argument, NULL, 'h'},
{"port", required_argument, NULL, 'p'},
diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
index a52bd4e124..fcedfed2b2 100644
--- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl
+++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
@@ -44,7 +44,7 @@ ok(! -d "$tempdir/backup", 'backup directory was cleaned up');
$node->command_fails(
[ 'pg_basebackup', '-D', "$tempdir/backup", '-n' ],
- 'failing run with noclean option');
+ 'failing run with no-clean option');
ok(-d "$tempdir/backup", 'backup directory was created and left behind');
diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm
index 535d6c0e7c..6e5a75a050 100644
--- a/src/test/perl/PostgresNode.pm
+++ b/src/test/perl/PostgresNode.pm
@@ -484,7 +484,7 @@ sub backup
print "# Taking pg_basebackup $backup_name from node \"$name\"\n";
TestLib::system_or_bail('pg_basebackup', '-D', $backup_path, '-p', $port,
- '-x', '--nosync');
+ '-x', '--no-sync');
print "# Backup finished\n";
}
diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c
index 762adb8443..f2dedbbc8a 100644
--- a/src/test/regress/pg_regress.c
+++ b/src/test/regress/pg_regress.c
@@ -2239,7 +2239,7 @@ regression_main(int argc, char *argv[], init_function ifunc, test_function tfunc
/* initdb */
header(_("initializing database system"));
snprintf(buf, sizeof(buf),
- "\"%s%sinitdb\" -D \"%s/data\" --noclean --nosync%s%s > \"%s/log/initdb.log\" 2>&1",
+ "\"%s%sinitdb\" -D \"%s/data\" --no-clean --no-sync%s%s > \"%s/log/initdb.log\" 2>&1",
bindir ? bindir : "",
bindir ? "/" : "",
temp_instance,
--
cgit v1.2.3
From 9ffe4a8b4cbb713bf8137f8414f02d97b6b2eb08 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Wed, 19 Oct 2016 12:00:00 -0400
Subject: Make getrusage() output a little more readable
Reviewed-by: Robert Haas
Reviewed-by: Peter Geoghegan
---
doc/src/sgml/ref/vacuum.sgml | 12 ++++++------
src/backend/tcop/postgres.c | 10 +++++-----
src/backend/utils/misc/pg_rusage.c | 6 +++---
3 files changed, 14 insertions(+), 14 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/vacuum.sgml b/doc/src/sgml/ref/vacuum.sgml
index dee1c5bad3..f18180a2fa 100644
--- a/doc/src/sgml/ref/vacuum.sgml
+++ b/doc/src/sgml/ref/vacuum.sgml
@@ -253,27 +253,27 @@ INFO: vacuuming "public.onek"
INFO: index "onek_unique1" now contains 1000 tuples in 14 pages
DETAIL: 3000 index tuples were removed.
0 index pages have been deleted, 0 are currently reusable.
-CPU 0.01s/0.08u sec elapsed 0.18 sec.
+CPU: user: 0.08 s, system: 0.01 s, elapsed: 0.18 s.
INFO: index "onek_unique2" now contains 1000 tuples in 16 pages
DETAIL: 3000 index tuples were removed.
0 index pages have been deleted, 0 are currently reusable.
-CPU 0.00s/0.07u sec elapsed 0.23 sec.
+CPU: user: 0.07 s, system: 0.00 s, elapsed: 0.23 s.
INFO: index "onek_hundred" now contains 1000 tuples in 13 pages
DETAIL: 3000 index tuples were removed.
0 index pages have been deleted, 0 are currently reusable.
-CPU 0.01s/0.08u sec elapsed 0.17 sec.
+CPU: user: 0.08 s, system: 0.01 s, elapsed: 0.17 s.
INFO: index "onek_stringu1" now contains 1000 tuples in 48 pages
DETAIL: 3000 index tuples were removed.
0 index pages have been deleted, 0 are currently reusable.
-CPU 0.01s/0.09u sec elapsed 0.59 sec.
+CPU: user: 0.09 s, system: 0.01 s, elapsed: 0.59 s.
INFO: "onek": removed 3000 tuples in 108 pages
-DETAIL: CPU 0.01s/0.06u sec elapsed 0.07 sec.
+DETAIL: CPU: user: 0.06 s, system: 0.01 s, elapsed: 0.07 s.
INFO: "onek": found 3000 removable, 1000 nonremovable tuples in 143 pages
DETAIL: 0 dead tuples cannot be removed yet.
There were 0 unused item pointers.
Skipped 0 pages due to buffer pins.
0 pages are entirely empty.
-CPU 0.07s/0.39u sec elapsed 1.56 sec.
+CPU: user: 0.39 s, system: 0.07 s, elapsed: 1.56 s.
INFO: analyzing "public.onek"
INFO: "onek": 36 pages, 1000 rows sampled, 1000 estimated total rows
VACUUM
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 2347f1bcdc..599874e743 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -4422,15 +4422,15 @@ ShowUsage(const char *title)
appendStringInfoString(&str, "! system usage stats:\n");
appendStringInfo(&str,
- "!\t%ld.%06ld elapsed %ld.%06ld user %ld.%06ld system sec\n",
- (long) (elapse_t.tv_sec - Save_t.tv_sec),
- (long) (elapse_t.tv_usec - Save_t.tv_usec),
+ "!\t%ld.%06ld s user, %ld.%06ld s system, %ld.%06ld s elapsed\n",
(long) (r.ru_utime.tv_sec - Save_r.ru_utime.tv_sec),
(long) (r.ru_utime.tv_usec - Save_r.ru_utime.tv_usec),
(long) (r.ru_stime.tv_sec - Save_r.ru_stime.tv_sec),
- (long) (r.ru_stime.tv_usec - Save_r.ru_stime.tv_usec));
+ (long) (r.ru_stime.tv_usec - Save_r.ru_stime.tv_usec),
+ (long) (elapse_t.tv_sec - Save_t.tv_sec),
+ (long) (elapse_t.tv_usec - Save_t.tv_usec));
appendStringInfo(&str,
- "!\t[%ld.%06ld user %ld.%06ld sys total]\n",
+ "!\t[%ld.%06ld s user, %ld.%06ld s system total]\n",
(long) user.tv_sec,
(long) user.tv_usec,
(long) sys.tv_sec,
diff --git a/src/backend/utils/misc/pg_rusage.c b/src/backend/utils/misc/pg_rusage.c
index 8781a383c0..6602663e42 100644
--- a/src/backend/utils/misc/pg_rusage.c
+++ b/src/backend/utils/misc/pg_rusage.c
@@ -61,11 +61,11 @@ pg_rusage_show(const PGRUsage *ru0)
}
snprintf(result, sizeof(result),
- "CPU %d.%02ds/%d.%02du sec elapsed %d.%02d sec",
- (int) (ru1.ru.ru_stime.tv_sec - ru0->ru.ru_stime.tv_sec),
- (int) (ru1.ru.ru_stime.tv_usec - ru0->ru.ru_stime.tv_usec) / 10000,
+ "CPU: user: %d.%02d s, system: %d.%02d s, elapsed: %d.%02d s",
(int) (ru1.ru.ru_utime.tv_sec - ru0->ru.ru_utime.tv_sec),
(int) (ru1.ru.ru_utime.tv_usec - ru0->ru.ru_utime.tv_usec) / 10000,
+ (int) (ru1.ru.ru_stime.tv_sec - ru0->ru.ru_stime.tv_sec),
+ (int) (ru1.ru.ru_stime.tv_usec - ru0->ru.ru_stime.tv_usec) / 10000,
(int) (ru1.tv.tv_sec - ru0->tv.tv_sec),
(int) (ru1.tv.tv_usec - ru0->tv.tv_usec) / 10000);
--
cgit v1.2.3
From f82ec32ac30ae7e3ec7c84067192535b2ff8ec0e Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Thu, 20 Oct 2016 11:24:37 -0400
Subject: Rename "pg_xlog" directory to "pg_wal".
"xlog" is not a particularly clear abbreviation for "write-ahead log",
and it sometimes confuses users into believe that the contents of the
"pg_xlog" directory are not critical data, leading to unpleasant
consequences. So, rename the directory to "pg_wal".
This patch modifies pg_upgrade and pg_basebackup to understand both
the old and new directory layouts; the former is necessary given the
purpose of the tool, while the latter merely avoids an unnecessary
backward-compatibility break.
We may wish to consider renaming other programs, switches, and
functions which still use the old "xlog" naming to also refer to
"wal". However, that's still under discussion, so let's do just this
much for now.
Discussion: CAB7nPqTeC-8+zux8_-4ZD46V7YPwooeFxgndfsq5Rg8ibLVm1A@mail.gmail.com
Michael Paquier
---
doc/src/sgml/backup.sgml | 28 ++++----
doc/src/sgml/config.sgml | 6 +-
doc/src/sgml/func.sgml | 2 +-
doc/src/sgml/high-availability.sgml | 14 ++--
doc/src/sgml/perform.sgml | 2 +-
doc/src/sgml/protocol.sgml | 6 +-
doc/src/sgml/ref/pg_resetxlog.sgml | 10 +--
doc/src/sgml/ref/pg_rewind.sgml | 4 +-
doc/src/sgml/ref/pg_xlogdump.sgml | 2 +-
doc/src/sgml/ref/pgtestfsync.sgml | 4 +-
doc/src/sgml/ref/pgupgrade.sgml | 4 +-
doc/src/sgml/storage.sgml | 2 +-
doc/src/sgml/wal.sgml | 10 +--
src/backend/access/transam/timeline.c | 4 +-
src/backend/access/transam/xlog.c | 100 +++++++++++++--------------
src/backend/access/transam/xlogarchive.c | 2 +-
src/backend/access/transam/xlogfuncs.c | 2 +-
src/backend/replication/README | 2 +-
src/backend/replication/basebackup.c | 20 +++---
src/backend/replication/walreceiver.c | 6 +-
src/backend/replication/walsender.c | 4 +-
src/backend/storage/file/fd.c | 16 ++---
src/bin/initdb/initdb.c | 12 ++--
src/bin/pg_basebackup/pg_basebackup.c | 80 +++++++++++++--------
src/bin/pg_basebackup/t/010_pg_basebackup.pl | 8 +--
src/bin/pg_resetxlog/pg_resetxlog.c | 2 +-
src/bin/pg_rewind/copy_fetch.c | 4 +-
src/bin/pg_rewind/filemap.c | 8 +--
src/bin/pg_rewind/parsexlog.c | 2 +-
src/bin/pg_rewind/t/004_pg_xlog_symlink.pl | 10 +--
src/bin/pg_upgrade/exec.c | 79 +++++++++++++--------
src/bin/pg_xlogdump/pg_xlogdump.c | 2 +-
src/common/file_utils.c | 39 +++++++----
src/include/access/xlog_internal.h | 2 +-
src/include/catalog/catversion.h | 2 +-
src/include/common/file_utils.h | 3 +-
src/include/postmaster/pgarch.h | 2 +-
37 files changed, 279 insertions(+), 226 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/backup.sgml b/doc/src/sgml/backup.sgml
index 95d0ff3149..6eaed1efbe 100644
--- a/doc/src/sgml/backup.sgml
+++ b/doc/src/sgml/backup.sgml
@@ -472,7 +472,7 @@ tar -cf backup.tar /usr/local/pgsql/data
At all times, PostgreSQL> maintains a
- write ahead log> (WAL) in the pg_xlog/>
+ write ahead log> (WAL) in the pg_wal/>
subdirectory of the cluster's data directory. The log records
every change made to the database's data files. This log exists
primarily for crash-safety purposes: if the system crashes, the
@@ -616,7 +616,7 @@ archive_command = 'copy "%p" "C:\\server\\archivedir\\%f"' # Windows
%p> and %f> parameters have been replaced,
the actual command executed might look like this:
-test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_xlog/00000001000000A900000065 /mnt/server/archivedir/00000001000000A900000065
+test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_wal/00000001000000A900000065 /mnt/server/archivedir/00000001000000A900000065
A similar command will be generated for each new file to be archived.
@@ -668,9 +668,9 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_xlog/
fills, nothing further can be archived until the tape is swapped.
You should ensure that any error condition or request to a human operator
is reported appropriately so that the situation can be
- resolved reasonably quickly. The pg_xlog/> directory will
+ resolved reasonably quickly. The pg_wal/> directory will
continue to fill with WAL segment files until the situation is resolved.
- (If the file system containing pg_xlog/> fills up,
+ (If the file system containing pg_wal/> fills up,
PostgreSQL> will do a PANIC shutdown. No committed
transactions will be lost, but the database will remain offline until
you free some space.)
@@ -682,7 +682,7 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_xlog/
operation continues even if the archiving process falls a little behind.
If archiving falls significantly behind, this will increase the amount of
data that would be lost in the event of a disaster. It will also mean that
- the pg_xlog/> directory will contain large numbers of
+ the pg_wal/> directory will contain large numbers of
not-yet-archived segment files, which could eventually exceed available
disk space. You are advised to monitor the archiving process to ensure that
it is working as you intend.
@@ -743,7 +743,7 @@ test ! -f /mnt/server/archivedir/00000001000000A900000065 && cp pg_xlog/
configuration file reload. If you wish to temporarily stop archiving,
one way to do it is to set archive_command> to the empty
string (''>).
- This will cause WAL files to accumulate in pg_xlog/> until a
+ This will cause WAL files to accumulate in pg_wal/> until a
working archive_command> is re-established.
@@ -1062,10 +1062,10 @@ SELECT pg_stop_backup();
You should, however, omit from the backup the files within the
- cluster's pg_xlog/> subdirectory. This
+ cluster's pg_wal/> subdirectory. This
slight adjustment is worthwhile because it reduces the risk
of mistakes when restoring. This is easy to arrange if
- pg_xlog/> is a symbolic link pointing to someplace outside
+ pg_wal/> is a symbolic link pointing to someplace outside
the cluster directory, which is a common setup anyway for performance
reasons. You might also want to exclude postmaster.pid>
and postmaster.opts>, which record information
@@ -1149,7 +1149,7 @@ SELECT pg_stop_backup();
location in case you need them later. Note that this precaution will
require that you have enough free space on your system to hold two
copies of your existing database. If you do not have enough space,
- you should at least save the contents of the cluster's pg_xlog>
+ you should at least save the contents of the cluster's pg_wal>
subdirectory, as it might contain logs which
were not archived before the system went down.
@@ -1172,9 +1172,9 @@ SELECT pg_stop_backup();
- Remove any files present in pg_xlog/>; these came from the
+ Remove any files present in pg_wal/>; these came from the
file system backup and are therefore probably obsolete rather than current.
- If you didn't archive pg_xlog/> at all, then recreate
+ If you didn't archive pg_wal/> at all, then recreate
it with proper permissions,
being careful to ensure that you re-establish it as a symbolic link
if you had it set up that way before.
@@ -1183,7 +1183,7 @@ SELECT pg_stop_backup();
If you have unarchived WAL segment files that you saved in step 2,
- copy them into pg_xlog/>. (It is best to copy them,
+ copy them into pg_wal/>. (It is best to copy them,
not move them, so you still have the unmodified files if a
problem occurs and you have to start over.)
@@ -1265,9 +1265,9 @@ restore_command = 'cp /mnt/server/archivedir/%f %p'
WAL segments that cannot be found in the archive will be sought in
- pg_xlog/>; this allows use of recent un-archived segments.
+ pg_wal/>; this allows use of recent un-archived segments.
However, segments that are available from the archive will be used in
- preference to files in pg_xlog/>.
+ preference to files in pg_wal/>.
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 99ff9f5ab5..adab2f8378 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -2932,7 +2932,7 @@ include_dir 'conf.d'
Specifies the minimum number of past log file segments kept in the
- pg_xlog>
+ pg_wal>
directory, in case a standby server needs to fetch them for streaming
replication. Each segment is normally 16 megabytes. If a standby
server connected to the sending server falls behind by more than
@@ -2946,7 +2946,7 @@ include_dir 'conf.d'
This sets only the minimum number of segments retained in
- pg_xlog>; the system might need to retain more segments
+ pg_wal>; the system might need to retain more segments
for WAL archival or to recover from a checkpoint. If
wal_keep_segments> is zero (the default), the system
doesn't keep any extra segments for standby purposes, so the number
@@ -3322,7 +3322,7 @@ include_dir 'conf.d'
Specify how long the standby server should wait when WAL data is not
available from any sources (streaming replication,
- local pg_xlog> or WAL archive) before retrying to
+ local pg_wal> or WAL archive) before retrying to
retrieve WAL data. This parameter can only be set in the
postgresql.conf> file or on the server command line.
The default value is 5 seconds. Units are milliseconds if not specified.
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index a58835082b..2e64cc430c 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -15327,7 +15327,7 @@ SELECT * FROM pg_ls_dir('.') WITH ORDINALITY AS t(ls,n);
pg_snapshots | 13
pg_multixact | 14
PG_VERSION | 15
- pg_xlog | 16
+ pg_wal | 16
pg_hba.conf | 17
pg_stat_tmp | 18
pg_subtrans | 19
diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml
index 06f49dba5d..5bedaf27a2 100644
--- a/doc/src/sgml/high-availability.sgml
+++ b/doc/src/sgml/high-availability.sgml
@@ -594,24 +594,24 @@ protocol to make nodes agree on a serializable transactional order.
(see ) or directly from the master
over a TCP connection (streaming replication). The standby server will
also attempt to restore any WAL found in the standby cluster's
- pg_xlog> directory. That typically happens after a server
+ pg_wal> directory. That typically happens after a server
restart, when the standby replays again WAL that was streamed from the
master before the restart, but you can also manually copy files to
- pg_xlog> at any time to have them replayed.
+ pg_wal> at any time to have them replayed.
At startup, the standby begins by restoring all WAL available in the
archive location, calling restore_command>. Once it
reaches the end of WAL available there and restore_command>
- fails, it tries to restore any WAL available in the pg_xlog> directory.
+ fails, it tries to restore any WAL available in the pg_wal> directory.
If that fails, and streaming replication has been configured, the
standby tries to connect to the primary server and start streaming WAL
- from the last valid record found in archive or pg_xlog>. If that fails
+ from the last valid record found in archive or pg_wal>. If that fails
or streaming replication is not configured, or if the connection is
later disconnected, the standby goes back to step 1 and tries to
restore the file from the archive again. This loop of retries from the
- archive, pg_xlog>, and via streaming replication goes on until the server
+ archive, pg_wal>, and via streaming replication goes on until the server
is stopped or failover is triggered by a trigger file.
@@ -619,7 +619,7 @@ protocol to make nodes agree on a serializable transactional order.
Standby mode is exited and the server switches to normal operation
when pg_ctl promote> is run or a trigger file is found
(trigger_file>). Before failover,
- any WAL immediately available in the archive or in pg_xlog> will be
+ any WAL immediately available in the archive or in pg_wal> will be
restored, but no attempt is made to connect to the master.
@@ -895,7 +895,7 @@ primary_conninfo = 'host=192.168.1.50 port=5432 user=foo password=foopass'
However, these methods often result in retaining more WAL segments than
required, whereas replication slots retain only the number of segments
known to be needed. An advantage of these methods is that they bound
- the space requirement for pg_xlog>; there is currently no way
+ the space requirement for pg_wal>; there is currently no way
to do this using replication slots.
diff --git a/doc/src/sgml/perform.sgml b/doc/src/sgml/perform.sgml
index 7bcbfa7611..8d30fd1384 100644
--- a/doc/src/sgml/perform.sgml
+++ b/doc/src/sgml/perform.sgml
@@ -1612,7 +1612,7 @@ SELECT * FROM x, y, a, b, c WHERE something AND somethingelse;
Increase and ; this reduces the frequency
of checkpoints, but increases the storage requirements of
- /pg_xlog>.
+ /pg_wal>.
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index 3384e73448..50cf527427 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -1947,7 +1947,7 @@ The commands accepted in walsender mode are:
Include the necessary WAL segments in the backup. This will include
all the files between start and stop backup in the
- pg_xlog directory of the base directory tar
+ pg_wal directory of the base directory tar
file.
@@ -2076,8 +2076,8 @@ The commands accepted in walsender mode are:
- pg_xlog>, including subdirectories. If the backup is run
- with WAL files included, a synthesized version of pg_xlog will be
+ pg_wal>, including subdirectories. If the backup is run
+ with WAL files included, a synthesized version of pg_wal will be
included, but it will only contain the files necessary for the
backup to work, not the rest of the contents.
diff --git a/doc/src/sgml/ref/pg_resetxlog.sgml b/doc/src/sgml/ref/pg_resetxlog.sgml
index fd9d0be6f4..c949c5e849 100644
--- a/doc/src/sgml/ref/pg_resetxlog.sgml
+++ b/doc/src/sgml/ref/pg_resetxlog.sgml
@@ -173,22 +173,22 @@ PostgreSQL documentation
The WAL starting address should be
larger than any WAL segment file name currently existing in
- the directory pg_xlog> under the data directory.
+ the directory pg_wal> under the data directory.
These names are also in hexadecimal and have three parts. The first
part is the timeline ID> and should usually be kept the same.
For example, if 00000001000000320000004A> is the
- largest entry in pg_xlog>, use -l 00000001000000320000004B> or higher.
+ largest entry in pg_wal>, use -l 00000001000000320000004B> or higher.
pg_resetxlog itself looks at the files in
- pg_xlog> and chooses a default
-l> setting
+ pg_wal> and chooses a default
-l> setting
beyond the last existing file name. Therefore, manual adjustment of
-l> should only be needed if you are aware of WAL segment
- files that are not currently present in pg_xlog>, such as
+ files that are not currently present in pg_wal>, such as
entries in an offline archive; or if the contents of
- pg_xlog> have been lost entirely.
+ pg_wal> have been lost entirely.
diff --git a/doc/src/sgml/ref/pg_rewind.sgml b/doc/src/sgml/ref/pg_rewind.sgml
index 42ebfbfdef..371c4a475f 100644
--- a/doc/src/sgml/ref/pg_rewind.sgml
+++ b/doc/src/sgml/ref/pg_rewind.sgml
@@ -61,14 +61,14 @@ PostgreSQL documentation
pg_rewind> examines the timeline histories of the source
and target clusters to determine the point where they diverged, and
- expects to find WAL in the target cluster's pg_xlog> directory
+ expects to find WAL in the target cluster's pg_wal> directory
reaching all the way back to the point of divergence. The point of divergence
can be found either on the target timeline, the source timeline, or their common
ancestor. In the typical failover scenario where the target cluster was
shut down soon after the divergence, this is not a problem, but if the
target cluster ran for a long time after the divergence, the old WAL
files might no longer be present. In that case, they can be manually
- copied from the WAL archive to the pg_xlog> directory, or
+ copied from the WAL archive to the pg_wal> directory, or
fetched on startup by configuring recovery.conf>. The use of
pg_rewind> is not limited to failover, e.g. a standby
server can be promoted, run some write transactions, and then rewinded
diff --git a/doc/src/sgml/ref/pg_xlogdump.sgml b/doc/src/sgml/ref/pg_xlogdump.sgml
index 177caab00d..cfb6d87259 100644
--- a/doc/src/sgml/ref/pg_xlogdump.sgml
+++ b/doc/src/sgml/ref/pg_xlogdump.sgml
@@ -118,7 +118,7 @@ PostgreSQL documentation
Directory in which to find log segment files. The default is to search
- for them in the pg_xlog subdirectory of the current
+ for them in the pg_wal subdirectory of the current
directory.
diff --git a/doc/src/sgml/ref/pgtestfsync.sgml b/doc/src/sgml/ref/pgtestfsync.sgml
index 6e134c75df..5856356b42 100644
--- a/doc/src/sgml/ref/pgtestfsync.sgml
+++ b/doc/src/sgml/ref/pgtestfsync.sgml
@@ -57,8 +57,8 @@
Specifies the file name to write test data in.
This file should be in the same file system that the
- pg_xlog> directory is or will be placed in.
- (pg_xlog> contains the WAL> files.)
+ pg_wal> directory is or will be placed in.
+ (pg_wal> contains the WAL> files.)
The default is pg_test_fsync.out> in the current
directory.
diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml
index d46a730f66..ad28526296 100644
--- a/doc/src/sgml/ref/pgupgrade.sgml
+++ b/doc/src/sgml/ref/pgupgrade.sgml
@@ -345,7 +345,7 @@ NET STOP postgresql-9.0
your old cluster
once you start the new cluster after the upgrade. Link mode also
requires that the old and new cluster data directories be in the
- same file system. (Tablespaces and pg_xlog> can be on
+ same file system. (Tablespaces and pg_wal> can be on
different file systems.) See pg_upgrade --help> for a full
list of options.
@@ -508,7 +508,7 @@ rsync --archive --delete --hard-links --size-only old_pgdata new_pgdata remote_d
If you have tablespaces, you will need to run a similar
rsync> command for each tablespace directory. If you
- have relocated pg_xlog> outside the data directories,
+ have relocated pg_wal> outside the data directories,
rsync> must be run on those directories too.
diff --git a/doc/src/sgml/storage.sgml b/doc/src/sgml/storage.sgml
index 1b812bd0a9..fddb69bad3 100644
--- a/doc/src/sgml/storage.sgml
+++ b/doc/src/sgml/storage.sgml
@@ -141,7 +141,7 @@ Item
- pg_xlog>
+ pg_wal>Subdirectory containing WAL (Write Ahead Log) files
diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml
index fe3b588c72..346aa769a8 100644
--- a/doc/src/sgml/wal.sgml
+++ b/doc/src/sgml/wal.sgml
@@ -557,7 +557,7 @@
- The number of WAL segment files in pg_xlog> directory depends on
+ The number of WAL segment files in pg_wal> directory depends on
min_wal_size>, max_wal_size> and
the amount of WAL generated in previous checkpoint cycles. When old log
segment files are no longer needed, they are removed or recycled (that is,
@@ -582,7 +582,7 @@
kept at all times. Also, if WAL archiving is used, old segments can not be
removed or recycled until they are archived. If WAL archiving cannot keep up
with the pace that WAL is generated, or if archive_command
- fails repeatedly, old WAL files will accumulate in pg_xlog>
+ fails repeatedly, old WAL files will accumulate in pg_wal>
until the situation is resolved. A slow or failed standby server that
uses a replication slot will have the same effect (see
).
@@ -594,7 +594,7 @@
which are similar to checkpoints in normal operation: the server forces
all its state to disk, updates the pg_control> file to
indicate that the already-processed WAL data need not be scanned again,
- and then recycles any old log segment files in the pg_xlog>
+ and then recycles any old log segment files in the pg_wal>
directory.
Restartpoints can't be performed more frequently than checkpoints in the
master because restartpoints can only be performed at checkpoint records.
@@ -750,7 +750,7 @@
WAL logs are stored in the directory
- pg_xlog under the data directory, as a set of
+ pg_wal under the data directory, as a set of
segment files, normally each 16 MB in size (but the size can be changed
by altering the
--with-wal-segsize> configure option when
building the server). Each segment is divided into pages, normally
@@ -767,7 +767,7 @@
It is advantageous if the log is located on a different disk from the
main database files. This can be achieved by moving the
- pg_xlog directory to another location (while the server
+ pg_wal directory to another location (while the server
is shut down, of course) and creating a symbolic link from the
original location in the main data directory to the new location.
diff --git a/src/backend/access/transam/timeline.c b/src/backend/access/transam/timeline.c
index bd91573708..43436a97a9 100644
--- a/src/backend/access/transam/timeline.c
+++ b/src/backend/access/transam/timeline.c
@@ -43,7 +43,7 @@
/*
* Copies all timeline history files with id's between 'begin' and 'end'
- * from archive to pg_xlog.
+ * from archive to pg_wal.
*/
void
restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
@@ -191,7 +191,7 @@ readTimeLineHistory(TimeLineID targetTLI)
result = lcons(entry, result);
/*
- * If the history file was fetched from archive, save it in pg_xlog for
+ * If the history file was fetched from archive, save it in pg_wal for
* future reference.
*/
if (fromArchive)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 08c87f91be..4fda1798cc 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -235,9 +235,9 @@ static int LocalXLogInsertAllowed = -1;
* valid in the startup process.
*
* When ArchiveRecoveryRequested is true, but InArchiveRecovery is false, we're
- * currently performing crash recovery using only XLOG files in pg_xlog, but
+ * currently performing crash recovery using only XLOG files in pg_wal, but
* will switch to using offline XLOG archives as soon as we reach the end of
- * WAL in pg_xlog.
+ * WAL in pg_wal.
*/
bool ArchiveRecoveryRequested = false;
bool InArchiveRecovery = false;
@@ -700,12 +700,12 @@ typedef enum
{
XLOG_FROM_ANY = 0, /* request to read WAL from any source */
XLOG_FROM_ARCHIVE, /* restored using restore_command */
- XLOG_FROM_PG_XLOG, /* existing file in pg_xlog */
+ XLOG_FROM_PG_WAL, /* existing file in pg_wal */
XLOG_FROM_STREAM /* streamed from master */
} XLogSource;
/* human-readable names for XLogSources, for debugging output */
-static const char *xlogSourceNames[] = {"any", "archive", "pg_xlog", "stream"};
+static const char *xlogSourceNames[] = {"any", "archive", "pg_wal", "stream"};
/*
* openLogFile is -1 or a kernel FD for an open log file segment.
@@ -3345,7 +3345,7 @@ XLogFileOpen(XLogSegNo segno)
* Open a logfile segment for reading (during recovery).
*
* If source == XLOG_FROM_ARCHIVE, the segment is retrieved from archive.
- * Otherwise, it's assumed to be already available in pg_xlog.
+ * Otherwise, it's assumed to be already available in pg_wal.
*/
static int
XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
@@ -3374,7 +3374,7 @@ XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
return -1;
break;
- case XLOG_FROM_PG_XLOG:
+ case XLOG_FROM_PG_WAL:
case XLOG_FROM_STREAM:
XLogFilePath(path, tli, segno);
restoredFromArchive = false;
@@ -3393,7 +3393,7 @@ XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
KeepFileRestoredFromArchive(path, xlogfname);
/*
- * Set path to point at the new file in pg_xlog.
+ * Set path to point at the new file in pg_wal.
*/
snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname);
}
@@ -3481,10 +3481,10 @@ XLogFileReadAnyTLI(XLogSegNo segno, int emode, int source)
}
}
- if (source == XLOG_FROM_ANY || source == XLOG_FROM_PG_XLOG)
+ if (source == XLOG_FROM_ANY || source == XLOG_FROM_PG_WAL)
{
fd = XLogFileRead(segno, emode, tli,
- XLOG_FROM_PG_XLOG, true);
+ XLOG_FROM_PG_WAL, true);
if (fd != -1)
{
if (!expectedTLEs)
@@ -3693,10 +3693,10 @@ RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr)
*
* This is called during recovery, whenever we switch to follow a new
* timeline, and at the end of recovery when we create a new timeline. We
- * wouldn't otherwise care about extra WAL files lying in pg_xlog, but they
+ * wouldn't otherwise care about extra WAL files lying in pg_wal, but they
* might be leftover pre-allocated or recycled WAL segments on the old timeline
* that we haven't used yet, and contain garbage. If we just leave them in
- * pg_xlog, they will eventually be archived, and we can't let that happen.
+ * pg_wal, they will eventually be archived, and we can't let that happen.
* Files that belong to our timeline history are valid, because we have
* successfully replayed them, but from others we can't be sure.
*
@@ -3853,15 +3853,15 @@ RemoveXlogFile(const char *segname, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr)
}
/*
- * Verify whether pg_xlog and pg_xlog/archive_status exist.
+ * Verify whether pg_wal and pg_wal/archive_status exist.
* If the latter does not exist, recreate it.
*
* It is not the goal of this function to verify the contents of these
* directories, but to help in cases where someone has performed a cluster
- * copy for PITR purposes but omitted pg_xlog from the copy.
+ * copy for PITR purposes but omitted pg_wal from the copy.
*
- * We could also recreate pg_xlog if it doesn't exist, but a deliberate
- * policy decision was made not to. It is fairly common for pg_xlog to be
+ * We could also recreate pg_wal if it doesn't exist, but a deliberate
+ * policy decision was made not to. It is fairly common for pg_wal to be
* a symlink, and if that was the DBA's intent then automatically making a
* plain directory would result in degraded performance with no notice.
*/
@@ -3871,7 +3871,7 @@ ValidateXLOGDirectoryStructure(void)
char path[MAXPGPATH];
struct stat stat_buf;
- /* Check for pg_xlog; if it doesn't exist, error out */
+ /* Check for pg_wal; if it doesn't exist, error out */
if (stat(XLOGDIR, &stat_buf) != 0 ||
!S_ISDIR(stat_buf.st_mode))
ereport(FATAL,
@@ -4027,11 +4027,11 @@ ReadRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, int emode,
* If archive recovery was requested, but we were still doing
* crash recovery, switch to archive recovery and retry using the
* offline archive. We have now replayed all the valid WAL in
- * pg_xlog, so we are presumably now consistent.
+ * pg_wal, so we are presumably now consistent.
*
* We require that there's at least some valid WAL present in
- * pg_xlog, however (!fetch_ckpt). We could recover using the WAL
- * from the archive, even if pg_xlog is completely empty, but we'd
+ * pg_wal, however (!fetch_ckpt). We could recover using the WAL
+ * from the archive, even if pg_wal is completely empty, but we'd
* have no idea how far we'd have to replay to reach consistency.
* So err on the safe side and give up.
*/
@@ -4039,7 +4039,7 @@ ReadRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, int emode,
!fetching_ckpt)
{
ereport(DEBUG1,
- (errmsg_internal("reached end of WAL in pg_xlog, entering archive recovery")));
+ (errmsg_internal("reached end of WAL in pg_wal, entering archive recovery")));
InArchiveRecovery = true;
if (StandbyModeRequested)
StandbyMode = true;
@@ -4156,7 +4156,7 @@ rescanLatestTimeLine(void)
/*
* As in StartupXLOG(), try to ensure we have all the history files
- * between the old target and new target in pg_xlog.
+ * between the old target and new target in pg_wal.
*/
restoreTimeLineHistoryFiles(oldtarget + 1, newtarget);
@@ -5189,7 +5189,7 @@ readRecoveryCommandFile(void)
ereport(WARNING,
(errmsg("recovery command file \"%s\" specified neither primary_conninfo nor restore_command",
RECOVERY_COMMAND_FILE),
- errhint("The database server will regularly poll the pg_xlog subdirectory to check for files placed there.")));
+ errhint("The database server will regularly poll the pg_wal subdirectory to check for files placed there.")));
}
else
{
@@ -6056,7 +6056,7 @@ StartupXLOG(void)
#endif
/*
- * Verify that pg_xlog and pg_xlog/archive_status exist. In cases where
+ * Verify that pg_wal and pg_wal/archive_status exist. In cases where
* someone has performed a copy for PITR, these directories may have been
* excluded and need to be re-created.
*/
@@ -6269,7 +6269,7 @@ StartupXLOG(void)
* and put it into archive recovery by creating a recovery.conf file.
*
* Our strategy in that case is to perform crash recovery first,
- * replaying all the WAL present in pg_xlog, and only enter archive
+ * replaying all the WAL present in pg_wal, and only enter archive
* recovery after that.
*
* But usually we already know how far we need to replay the WAL (up
@@ -6473,7 +6473,7 @@ StartupXLOG(void)
/*
* Copy any missing timeline history files between 'now' and the recovery
- * target timeline from archive to pg_xlog. While we don't need those
+ * target timeline from archive to pg_wal. While we don't need those
* files ourselves - the history file of the recovery target timeline
* covers all the previous timelines in the history too - a cascading
* standby server might be interested in them. Or, if you archive the WAL
@@ -7094,7 +7094,7 @@ StartupXLOG(void)
/*
* We are now done reading the xlog from stream. Turn off streaming
* recovery to force fetching the files (which would be required at end of
- * recovery, e.g., timeline history file) from archive or pg_xlog.
+ * recovery, e.g., timeline history file) from archive or pg_wal.
*/
StandbyMode = false;
@@ -7382,7 +7382,7 @@ StartupXLOG(void)
* As a compromise, we rename the last segment with the .partial
* suffix, and archive it. Archive recovery will never try to read
* .partial segments, so they will normally go unused. But in the odd
- * PITR case, the administrator can copy them manually to the pg_xlog
+ * PITR case, the administrator can copy them manually to the pg_wal
* directory (removing the suffix). They can be useful in debugging,
* too.
*
@@ -9958,7 +9958,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
* first WAL segment containing the startup checkpoint has pages in
* the beginning with the old timeline ID. That can cause trouble at
* recovery: we won't have a history file covering the old timeline if
- * pg_xlog directory was not included in the base backup and the WAL
+ * pg_wal directory was not included in the base backup and the WAL
* archive was cleared too before starting the backup.
*
* This also ensures that we have emitted a WAL page header that has
@@ -10605,7 +10605,7 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
* archived before returning. If archiving isn't enabled, the required WAL
* needs to be transported via streaming replication (hopefully with
* wal_keep_segments set high enough), or some more exotic mechanism like
- * polling and copying files from pg_xlog with script. We have no
+ * polling and copying files from pg_wal with script. We have no
* knowledge of those mechanisms, so it's up to the user to ensure that he
* gets all the required WAL.
*
@@ -11195,9 +11195,9 @@ next_record_is_invalid:
* Open the WAL segment containing WAL position 'RecPtr'.
*
* The segment can be fetched via restore_command, or via walreceiver having
- * streamed the record, or it can already be present in pg_xlog. Checking
- * pg_xlog is mainly for crash recovery, but it will be polled in standby mode
- * too, in case someone copies a new segment directly to pg_xlog. That is not
+ * streamed the record, or it can already be present in pg_wal. Checking
+ * pg_wal is mainly for crash recovery, but it will be polled in standby mode
+ * too, in case someone copies a new segment directly to pg_wal. That is not
* documented or recommended, though.
*
* If 'fetching_ckpt' is true, we're fetching a checkpoint record, and should
@@ -11227,8 +11227,8 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
/*-------
* Standby mode is implemented by a state machine:
*
- * 1. Read from either archive or pg_xlog (XLOG_FROM_ARCHIVE), or just
- * pg_xlog (XLOG_FROM_XLOG)
+ * 1. Read from either archive or pg_wal (XLOG_FROM_ARCHIVE), or just
+ * pg_wal (XLOG_FROM_PG_WAL)
* 2. Check trigger file
* 3. Read from primary server via walreceiver (XLOG_FROM_STREAM)
* 4. Rescan timelines
@@ -11244,7 +11244,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
*-------
*/
if (!InArchiveRecovery)
- currentSource = XLOG_FROM_PG_XLOG;
+ currentSource = XLOG_FROM_PG_WAL;
else if (currentSource == 0)
currentSource = XLOG_FROM_ARCHIVE;
@@ -11263,13 +11263,13 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
switch (currentSource)
{
case XLOG_FROM_ARCHIVE:
- case XLOG_FROM_PG_XLOG:
+ case XLOG_FROM_PG_WAL:
/*
* Check to see if the trigger file exists. Note that we
* do this only after failure, so when you create the
* trigger file, we still finish replaying as much as we
- * can from archive and pg_xlog before failover.
+ * can from archive and pg_wal before failover.
*/
if (StandbyMode && CheckForStandbyTrigger())
{
@@ -11279,7 +11279,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
/*
* Not in standby mode, and we've now tried the archive
- * and pg_xlog.
+ * and pg_wal.
*/
if (!StandbyMode)
return false;
@@ -11339,7 +11339,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
* little chance that the problem will just go away, but
* PANIC is not good for availability either, especially
* in hot standby mode. So, we treat that the same as
- * disconnection, and retry from archive/pg_xlog again.
+ * disconnection, and retry from archive/pg_wal again.
* The WAL in the archive should be identical to what was
* streamed, so it's unlikely that it helps, but one can
* hope...
@@ -11400,11 +11400,11 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
elog(ERROR, "unexpected WAL source %d", currentSource);
}
}
- else if (currentSource == XLOG_FROM_PG_XLOG)
+ else if (currentSource == XLOG_FROM_PG_WAL)
{
/*
- * We just successfully read a file in pg_xlog. We prefer files in
- * the archive over ones in pg_xlog, so try the next file again
+ * We just successfully read a file in pg_wal. We prefer files in
+ * the archive over ones in pg_wal, so try the next file again
* from the archive first.
*/
if (InArchiveRecovery)
@@ -11425,7 +11425,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
switch (currentSource)
{
case XLOG_FROM_ARCHIVE:
- case XLOG_FROM_PG_XLOG:
+ case XLOG_FROM_PG_WAL:
/* Close any old file we might have open. */
if (readFile >= 0)
{
@@ -11438,7 +11438,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
/*
* Try to restore the file from archive, or read an existing
- * file from pg_xlog.
+ * file from pg_wal.
*/
readFile = XLogFileReadAnyTLI(readSegNo, DEBUG2,
currentSource == XLOG_FROM_ARCHIVE ? XLOG_FROM_ANY :
@@ -11447,7 +11447,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
return true; /* success! */
/*
- * Nope, not found in archive or pg_xlog.
+ * Nope, not found in archive or pg_wal.
*/
lastSourceFailed = true;
break;
@@ -11503,7 +11503,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
* not open already. Also read the timeline history
* file if we haven't initialized timeline history
* yet; it should be streamed over and present in
- * pg_xlog by now. Use XLOG_FROM_STREAM so that
+ * pg_wal by now. Use XLOG_FROM_STREAM so that
* source info is set correctly and XLogReceiptTime
* isn't changed.
*/
@@ -11535,10 +11535,10 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
/*
* Note that we don't "return false" immediately here.
* After being triggered, we still want to replay all
- * the WAL that was already streamed. It's in pg_xlog
+ * the WAL that was already streamed. It's in pg_wal
* now, so we just treat this as a failure, and the
* state machine will move on to replay the streamed
- * WAL from pg_xlog, and then recheck the trigger and
+ * WAL from pg_wal, and then recheck the trigger and
* exit replay.
*/
lastSourceFailed = true;
@@ -11578,7 +11578,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
* or legitimate end-of-WAL situation. Generally, we use it as-is, but if
* we're retrying the exact same record that we've tried previously, only
* complain the first time to keep the noise down. However, we only do when
- * reading from pg_xlog, because we don't expect any invalid records in archive
+ * reading from pg_wal, because we don't expect any invalid records in archive
* or in records streamed from master. Files in the archive should be complete,
* and we should never hit the end of WAL because we stop and wait for more WAL
* to arrive before replaying it.
@@ -11593,7 +11593,7 @@ emode_for_corrupt_record(int emode, XLogRecPtr RecPtr)
{
static XLogRecPtr lastComplaint = 0;
- if (readSource == XLOG_FROM_PG_XLOG && emode == LOG)
+ if (readSource == XLOG_FROM_PG_WAL && emode == LOG)
{
if (RecPtr == lastComplaint)
emode = DEBUG1;
diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c
index d153a44ea9..b919164bcb 100644
--- a/src/backend/access/transam/xlogarchive.c
+++ b/src/backend/access/transam/xlogarchive.c
@@ -421,7 +421,7 @@ ExecuteRecoveryCommand(char *command, char *commandName, bool failOnSignal)
/*
* A file was restored from the archive under a temporary filename (path),
* and now we want to keep it. Rename it under the permanent filename in
- * in pg_xlog (xlogfname), replacing any existing file with the same name.
+ * in pg_wal (xlogfname), replacing any existing file with the same name.
*/
void
KeepFileRestoredFromArchive(char *path, char *xlogfname)
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index 33383b4dcc..01cbd90f40 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -128,7 +128,7 @@ pg_start_backup(PG_FUNCTION_ARGS)
* pg_stop_backup: finish taking an on-line backup dump
*
* We write an end-of-backup WAL record, and remove the backup label file
- * created by pg_start_backup, creating a backup history file in pg_xlog
+ * created by pg_start_backup, creating a backup history file in pg_wal
* instead (whence it will immediately be archived). The backup history file
* contains the same info found in the label file, plus the backup-end time
* and WAL location. Before 9.0, the backup-end time was read from the backup
diff --git a/src/backend/replication/README b/src/backend/replication/README
index ad4864dbbe..0cbb990613 100644
--- a/src/backend/replication/README
+++ b/src/backend/replication/README
@@ -54,7 +54,7 @@ and WalRcvData->slotname, and initializes the starting point in
WalRcvData->receiveStart.
As walreceiver receives WAL from the master server, and writes and flushes
-it to disk (in pg_xlog), it updates WalRcvData->receivedUpto and signals
+it to disk (in pg_wal), it updates WalRcvData->receivedUpto and signals
the startup process to know how far WAL replay can advance.
Walreceiver sends information about replication progress to the master server
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index fa75930c9f..ffc7e586db 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -346,7 +346,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
TimeLineID tli;
/*
- * I'd rather not worry about timelines here, so scan pg_xlog and
+ * I'd rather not worry about timelines here, so scan pg_wal and
* include all WAL files in the range between 'startptr' and 'endptr',
* regardless of the timeline the file is stamped with. If there are
* some spurious WAL files belonging to timelines that don't belong in
@@ -359,11 +359,11 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
XLByteToPrevSeg(endptr, endsegno);
XLogFileName(lastoff, ThisTimeLineID, endsegno);
- dir = AllocateDir("pg_xlog");
+ dir = AllocateDir("pg_wal");
if (!dir)
ereport(ERROR,
- (errmsg("could not open directory \"%s\": %m", "pg_xlog")));
- while ((de = ReadDir(dir, "pg_xlog")) != NULL)
+ (errmsg("could not open directory \"%s\": %m", "pg_wal")));
+ while ((de = ReadDir(dir, "pg_wal")) != NULL)
{
/* Does it look like a WAL segment, and is it in the range? */
if (IsXLogFileName(de->d_name) &&
@@ -401,7 +401,7 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
qsort(walFiles, nWalFiles, sizeof(char *), compareWalFileNames);
/*
- * There must be at least one xlog file in the pg_xlog directory,
+ * There must be at least one xlog file in the pg_wal directory,
* since we are doing backup-including-xlog.
*/
if (nWalFiles < 1)
@@ -1054,23 +1054,23 @@ sendDir(char *path, int basepathlen, bool sizeonly, List *tablespaces,
}
/*
- * We can skip pg_xlog, the WAL segments need to be fetched from the
+ * We can skip pg_wal, the WAL segments need to be fetched from the
* WAL archive anyway. But include it as an empty directory anyway, so
* we get permissions right.
*/
- if (strcmp(pathbuf, "./pg_xlog") == 0)
+ if (strcmp(pathbuf, "./pg_wal") == 0)
{
- /* If pg_xlog is a symlink, write it as a directory anyway */
+ /* If pg_wal is a symlink, write it as a directory anyway */
size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
/*
* Also send archive_status directory (by hackishly reusing
* statbuf from above ...).
*/
- size += _tarWriteHeader("./pg_xlog/archive_status", NULL, &statbuf,
+ size += _tarWriteHeader("./pg_wal/archive_status", NULL, &statbuf,
sizeonly);
- continue; /* don't recurse into pg_xlog */
+ continue; /* don't recurse into pg_wal */
}
/* Allow symbolic links in pg_tblspc only */
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index eed6effeeb..2bb3dce1b1 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -18,7 +18,7 @@
* If the primary server ends streaming, but doesn't disconnect, walreceiver
* goes into "waiting" mode, and waits for the startup process to give new
* instructions. The startup process will treat that the same as
- * disconnection, and will rescan the archive/pg_xlog directory. But when the
+ * disconnection, and will rescan the archive/pg_wal directory. But when the
* startup process wants to try streaming replication again, it will just
* nudge the existing walreceiver process that's waiting, instead of launching
* a new one.
@@ -365,7 +365,7 @@ WalReceiverMain(void)
* we've already reached the end of the old timeline, the server will
* finish the streaming immediately, and we will go back to await
* orders from the startup process. If recovery_target_timeline is
- * 'latest', the startup process will scan pg_xlog and find the new
+ * 'latest', the startup process will scan pg_wal and find the new
* history file, bump recovery target timeline, and ask us to restart
* on the new timeline.
*/
@@ -742,7 +742,7 @@ WalRcvFetchTimeLineHistoryFiles(TimeLineID first, TimeLineID last)
tli)));
/*
- * Write the file to pg_xlog.
+ * Write the file to pg_wal.
*/
writeTimeLineHistoryFile(tli, content, len);
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 0f3ced250c..bc5e50807a 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -586,7 +586,7 @@ StartReplication(StartReplicationCmd *cmd)
* segment that contains switchpoint, but on the new timeline, so
* that it doesn't end up with a partial segment. If you ask for a
* too old starting point, you'll get an error later when we fail
- * to find the requested WAL segment in pg_xlog.
+ * to find the requested WAL segment in pg_wal.
*
* XXX: we could be more strict here and only allow a startpoint
* that's older than the switchpoint, if it's still in the same
@@ -2058,7 +2058,7 @@ retry:
*
* For example, imagine that this server is currently on timeline
* 5, and we're streaming timeline 4. The switch from timeline 4
- * to 5 happened at 0/13002088. In pg_xlog, we have these files:
+ * to 5 happened at 0/13002088. In pg_wal, we have these files:
*
* ...
* 000000040000000000000012
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 03143f1133..b7ff5efe02 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -2787,7 +2787,7 @@ looks_like_temp_rel_name(const char *name)
* Issue fsync recursively on PGDATA and all its contents.
*
* We fsync regular files and directories wherever they are, but we
- * follow symlinks only for pg_xlog and immediately under pg_tblspc.
+ * follow symlinks only for pg_wal and immediately under pg_tblspc.
* Other symlinks are presumed to point at files we're not responsible
* for fsyncing, and might not have privileges to write at all.
*
@@ -2811,7 +2811,7 @@ SyncDataDirectory(void)
return;
/*
- * If pg_xlog is a symlink, we'll need to recurse into it separately,
+ * If pg_wal is a symlink, we'll need to recurse into it separately,
* because the first walkdir below will ignore it.
*/
xlog_is_symlink = false;
@@ -2820,16 +2820,16 @@ SyncDataDirectory(void)
{
struct stat st;
- if (lstat("pg_xlog", &st) < 0)
+ if (lstat("pg_wal", &st) < 0)
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m",
- "pg_xlog")));
+ "pg_wal")));
else if (S_ISLNK(st.st_mode))
xlog_is_symlink = true;
}
#else
- if (pgwin32_is_junction("pg_xlog"))
+ if (pgwin32_is_junction("pg_wal"))
xlog_is_symlink = true;
#endif
@@ -2841,7 +2841,7 @@ SyncDataDirectory(void)
#ifdef PG_FLUSH_DATA_WORKS
walkdir(".", pre_sync_fname, false, DEBUG1);
if (xlog_is_symlink)
- walkdir("pg_xlog", pre_sync_fname, false, DEBUG1);
+ walkdir("pg_wal", pre_sync_fname, false, DEBUG1);
walkdir("pg_tblspc", pre_sync_fname, true, DEBUG1);
#endif
@@ -2849,14 +2849,14 @@ SyncDataDirectory(void)
* Now we do the fsync()s in the same order.
*
* The main call ignores symlinks, so in addition to specially processing
- * pg_xlog if it's a symlink, pg_tblspc has to be visited separately with
+ * pg_wal if it's a symlink, pg_tblspc has to be visited separately with
* process_symlinks = true. Note that if there are any plain directories
* in pg_tblspc, they'll get fsync'd twice. That's not an expected case
* so we don't worry about optimizing it.
*/
walkdir(".", datadir_fsync_fname, false, LOG);
if (xlog_is_symlink)
- walkdir("pg_xlog", datadir_fsync_fname, false, LOG);
+ walkdir("pg_wal", datadir_fsync_fname, false, LOG);
walkdir("pg_tblspc", datadir_fsync_fname, true, LOG);
}
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 9e23f64130..c8a8c52c3d 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -195,7 +195,7 @@ static const char *backend_options = "--single -F -O -j -c search_path=pg_catalo
static const char *const subdirs[] = {
"global",
- "pg_xlog/archive_status",
+ "pg_wal/archive_status",
"pg_clog",
"pg_commit_ts",
"pg_dynshmem",
@@ -2091,8 +2091,6 @@ make_postgres(FILE *cmdfd)
PG_CMD_PUTS(*line);
}
-
-
/*
* signal handler in case we are interrupted.
*
@@ -2830,7 +2828,7 @@ create_xlog_or_symlink(void)
char *subdirloc;
/* form name of the place for the subdirectory or symlink */
- subdirloc = psprintf("%s/pg_xlog", pg_data);
+ subdirloc = psprintf("%s/pg_wal", pg_data);
if (strcmp(xlog_dir, "") != 0)
{
@@ -2963,7 +2961,7 @@ initialize_data_directory(void)
create_xlog_or_symlink();
- /* Create required subdirectories (other than pg_xlog) */
+ /* Create required subdirectories (other than pg_wal) */
printf(_("creating subdirectories ... "));
fflush(stdout);
@@ -3260,7 +3258,7 @@ main(int argc, char *argv[])
fputs(_("syncing data to disk ... "), stdout);
fflush(stdout);
- fsync_pgdata(pg_data, progname);
+ fsync_pgdata(pg_data, progname, PG_VERSION_NUM);
check_ok();
return 0;
}
@@ -3326,7 +3324,7 @@ main(int argc, char *argv[])
{
fputs(_("syncing data to disk ... "), stdout);
fflush(stdout);
- fsync_pgdata(pg_data, progname);
+ fsync_pgdata(pg_data, progname, PG_VERSION_NUM);
check_ok();
}
else
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 76e8f449fe..b82b8e1b26 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -55,6 +55,12 @@ typedef struct TablespaceList
TablespaceListCell *tail;
} TablespaceList;
+/*
+ * pg_xlog has been renamed to pg_wal in version 10. This version number
+ * should be compared with PQserverVersion().
+ */
+#define MINIMUM_VERSION_FOR_PG_WAL 100000
+
/* Global options */
static char *basedir = NULL;
static TablespaceList tablespace_dirs = {NULL, NULL};
@@ -526,15 +532,22 @@ StartLogStreamer(char *startpos, uint32 timeline, char *sysidentifier)
/* Error message already written in GetConnection() */
exit(1);
- snprintf(param->xlogdir, sizeof(param->xlogdir), "%s/pg_xlog", basedir);
+ /* In post-10 cluster, pg_xlog has been renamed to pg_wal */
+ snprintf(param->xlogdir, sizeof(param->xlogdir), "%s/%s",
+ basedir,
+ PQserverVersion(conn) < MINIMUM_VERSION_FOR_PG_WAL ?
+ "pg_xlog" : "pg_wal");
/*
- * Create pg_xlog/archive_status (and thus pg_xlog) so we can write to
- * basedir/pg_xlog as the directory entry in the tar file may arrive
- * later.
+ * Create pg_wal/archive_status or pg_xlog/archive_status (and thus
+ * pg_wal or pg_xlog) depending on the target server so we can write to
+ * basedir/pg_wal or basedir/pg_xlog as the directory entry in the tar
+ * file may arrive later.
*/
- snprintf(statusdir, sizeof(statusdir), "%s/pg_xlog/archive_status",
- basedir);
+ snprintf(statusdir, sizeof(statusdir), "%s/%s/archive_status",
+ basedir,
+ PQserverVersion(conn) < MINIMUM_VERSION_FOR_PG_WAL ?
+ "pg_xlog" : "pg_wal");
if (pg_mkdir_p(statusdir, S_IRWXU) != 0 && errno != EEXIST)
{
@@ -1338,15 +1351,17 @@ ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum)
if (mkdir(filename, S_IRWXU) != 0)
{
/*
- * When streaming WAL, pg_xlog will have been created
- * by the wal receiver process. Also, when transaction
- * log directory location was specified, pg_xlog has
- * already been created as a symbolic link before
- * starting the actual backup. So just ignore creation
- * failures on related directories.
+ * When streaming WAL, pg_wal (or pg_xlog for pre-9.6
+ * clusters) will have been created by the wal receiver
+ * process. Also, when transaction log directory location
+ * was specified, pg_wal (or pg_xlog) has already been
+ * created as a symbolic link before starting the actual
+ * backup. So just ignore creation failures on related
+ * directories.
*/
- if (!((pg_str_endswith(filename, "/pg_xlog") ||
- pg_str_endswith(filename, "/archive_status")) &&
+ if (!((pg_str_endswith(filename, "/pg_wal") ||
+ pg_str_endswith(filename, "/pg_xlog")||
+ pg_str_endswith(filename, "/archive_status")) &&
errno == EEXIST))
{
fprintf(stderr,
@@ -1634,15 +1649,10 @@ BaseBackup(void)
char xlogend[64];
int minServerMajor,
maxServerMajor;
- int serverMajor;
+ int serverVersion,
+ serverMajor;
- /*
- * Connect in replication mode to the server
- */
- conn = GetConnection();
- if (!conn)
- /* Error message already written in GetConnection() */
- exit(1);
+ Assert(conn != NULL);
/*
* Check server version. BASE_BACKUP command was introduced in 9.1, so we
@@ -1650,7 +1660,8 @@ BaseBackup(void)
*/
minServerMajor = 901;
maxServerMajor = PG_VERSION_NUM / 100;
- serverMajor = PQserverVersion(conn) / 100;
+ serverVersion = PQserverVersion(conn);
+ serverMajor = serverVersion / 100;
if (serverMajor < minServerMajor || serverMajor > maxServerMajor)
{
const char *serverver = PQparameterStatus(conn, "server_version");
@@ -1979,7 +1990,7 @@ BaseBackup(void)
}
else
{
- (void) fsync_pgdata(basedir, progname);
+ (void) fsync_pgdata(basedir, progname, serverVersion);
}
}
@@ -2296,6 +2307,14 @@ main(int argc, char **argv)
if (format == 'p' || strcmp(basedir, "-") != 0)
verify_dir_is_empty_or_create(basedir, &made_new_pgdata, &found_existing_pgdata);
+ /* connection in replication mode to server */
+ conn = GetConnection();
+ if (!conn)
+ {
+ /* Error message already written in GetConnection() */
+ exit(1);
+ }
+
/* Create transaction log symlink, if required */
if (strcmp(xlog_dir, "") != 0)
{
@@ -2303,19 +2322,24 @@ main(int argc, char **argv)
verify_dir_is_empty_or_create(xlog_dir, &made_new_xlogdir, &found_existing_xlogdir);
- /* form name of the place where the symlink must go */
- linkloc = psprintf("%s/pg_xlog", basedir);
+ /*
+ * Form name of the place where the symlink must go. pg_xlog has
+ * been renamed to pg_wal in post-10 clusters.
+ */
+ linkloc = psprintf("%s/%s", basedir,
+ PQserverVersion(conn) < MINIMUM_VERSION_FOR_PG_WAL ?
+ "pg_xlog" : "pg_wal");
#ifdef HAVE_SYMLINK
if (symlink(xlog_dir, linkloc) != 0)
{
fprintf(stderr, _("%s: could not create symbolic link \"%s\": %s\n"),
progname, linkloc, strerror(errno));
- exit(1);
+ disconnect_and_exit(1);
}
#else
fprintf(stderr, _("%s: symlinks are not supported on this platform\n"));
- exit(1);
+ disconnect_and_exit(1);
#endif
free(linkloc);
}
diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
index fcedfed2b2..579d7a15fb 100644
--- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl
+++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
@@ -67,9 +67,9 @@ $node->command_ok([ 'pg_basebackup', '-D', "$tempdir/backup" ],
'pg_basebackup runs');
ok(-f "$tempdir/backup/PG_VERSION", 'backup was created');
-# Only archive_status directory should be copied in pg_xlog/.
+# Only archive_status directory should be copied in pg_wal/.
is_deeply(
- [ sort(slurp_dir("$tempdir/backup/pg_xlog/")) ],
+ [ sort(slurp_dir("$tempdir/backup/pg_wal/")) ],
[ sort qw(. .. archive_status) ],
'no WAL files copied');
@@ -230,12 +230,12 @@ like(
$node->command_ok(
[ 'pg_basebackup', '-D', "$tempdir/backupxf", '-X', 'fetch' ],
'pg_basebackup -X fetch runs');
-ok(grep(/^[0-9A-F]{24}$/, slurp_dir("$tempdir/backupxf/pg_xlog")),
+ok(grep(/^[0-9A-F]{24}$/, slurp_dir("$tempdir/backupxf/pg_wal")),
'WAL files copied');
$node->command_ok(
[ 'pg_basebackup', '-D', "$tempdir/backupxs", '-X', 'stream' ],
'pg_basebackup -X stream runs');
-ok(grep(/^[0-9A-F]{24}$/, slurp_dir("$tempdir/backupxf/pg_xlog")),
+ok(grep(/^[0-9A-F]{24}$/, slurp_dir("$tempdir/backupxf/pg_wal")),
'WAL files copied');
$node->command_fails(
diff --git a/src/bin/pg_resetxlog/pg_resetxlog.c b/src/bin/pg_resetxlog/pg_resetxlog.c
index 525b82ba7a..2b76f64079 100644
--- a/src/bin/pg_resetxlog/pg_resetxlog.c
+++ b/src/bin/pg_resetxlog/pg_resetxlog.c
@@ -890,7 +890,7 @@ FindEndOfXLOG(void)
newXlogSegNo = ControlFile.checkPointCopy.redo / ControlFile.xlog_seg_size;
/*
- * Scan the pg_xlog directory to find existing WAL segment files. We
+ * Scan the pg_wal directory to find existing WAL segment files. We
* assume any present have been used; in most scenarios this should be
* conservative, because of xlog.c's attempts to pre-create files.
*/
diff --git a/src/bin/pg_rewind/copy_fetch.c b/src/bin/pg_rewind/copy_fetch.c
index 327e7ef541..d0c1586562 100644
--- a/src/bin/pg_rewind/copy_fetch.c
+++ b/src/bin/pg_rewind/copy_fetch.c
@@ -131,10 +131,10 @@ recurse_dir(const char *datadir, const char *parentpath,
/*
* If it's a symlink within pg_tblspc, we need to recurse into it,
* to process all the tablespaces. We also follow a symlink if
- * it's for pg_xlog. Symlinks elsewhere are ignored.
+ * it's for pg_wal. Symlinks elsewhere are ignored.
*/
if ((parentpath && strcmp(parentpath, "pg_tblspc") == 0) ||
- strcmp(path, "pg_xlog") == 0)
+ strcmp(path, "pg_wal") == 0)
recurse_dir(datadir, path, callback);
#else
pg_fatal("\"%s\" is a symbolic link, but symbolic links are not supported on this platform\n",
diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index 9b00dc1cdc..3905a5d6ce 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -79,11 +79,11 @@ process_source_file(const char *path, file_type_t type, size_t newsize,
return;
/*
- * Pretend that pg_xlog is a directory, even if it's really a symlink. We
+ * Pretend that pg_wal is a directory, even if it's really a symlink. We
* don't want to mess with the symlink itself, nor complain if it's a
* symlink in source but not in target or vice versa.
*/
- if (strcmp(path, "pg_xlog") == 0 && type == FILE_TYPE_SYMLINK)
+ if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
type = FILE_TYPE_DIRECTORY;
/*
@@ -120,7 +120,7 @@ process_source_file(const char *path, file_type_t type, size_t newsize,
switch (type)
{
case FILE_TYPE_DIRECTORY:
- if (exists && !S_ISDIR(statbuf.st_mode) && strcmp(path, "pg_xlog") != 0)
+ if (exists && !S_ISDIR(statbuf.st_mode) && strcmp(path, "pg_wal") != 0)
{
/* it's a directory in source, but not in target. Strange.. */
pg_fatal("\"%s\" is not a directory\n", localpath);
@@ -296,7 +296,7 @@ process_target_file(const char *path, file_type_t type, size_t oldsize,
/*
* Like in process_source_file, pretend that xlog is always a directory.
*/
- if (strcmp(path, "pg_xlog") == 0 && type == FILE_TYPE_SYMLINK)
+ if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
type = FILE_TYPE_DIRECTORY;
key.path = (char *) path;
diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index b53591d02a..23ac4e7bb0 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -54,7 +54,7 @@ static int SimpleXLogPageRead(XLogReaderState *xlogreader,
TimeLineID *pageTLI);
/*
- * Read WAL from the datadir/pg_xlog, starting from 'startpoint' on timeline
+ * Read WAL from the datadir/pg_wal, starting from 'startpoint' on timeline
* index 'tliIndex' in target timeline history, until 'endpoint'. Make note of
* the data blocks touched by the WAL records, and return them in a page map.
*/
diff --git a/src/bin/pg_rewind/t/004_pg_xlog_symlink.pl b/src/bin/pg_rewind/t/004_pg_xlog_symlink.pl
index bdcab5688b..12950ea1ca 100644
--- a/src/bin/pg_rewind/t/004_pg_xlog_symlink.pl
+++ b/src/bin/pg_rewind/t/004_pg_xlog_symlink.pl
@@ -1,5 +1,5 @@
#
-# Test pg_rewind when the target's pg_xlog directory is a symlink.
+# Test pg_rewind when the target's pg_wal directory is a symlink.
#
use strict;
use warnings;
@@ -30,10 +30,10 @@ sub run_test
my $test_master_datadir = $node_master->data_dir;
- # turn pg_xlog into a symlink
- print("moving $test_master_datadir/pg_xlog to $master_xlogdir\n");
- move("$test_master_datadir/pg_xlog", $master_xlogdir) or die;
- symlink($master_xlogdir, "$test_master_datadir/pg_xlog") or die;
+ # turn pg_wal into a symlink
+ print("moving $test_master_datadir/pg_wal to $master_xlogdir\n");
+ move("$test_master_datadir/pg_wal", $master_xlogdir) or die;
+ symlink($master_xlogdir, "$test_master_datadir/pg_wal") or die;
RewindTest::start_master();
diff --git a/src/bin/pg_upgrade/exec.c b/src/bin/pg_upgrade/exec.c
index 6d04e5671d..55a6f0dfba 100644
--- a/src/bin/pg_upgrade/exec.c
+++ b/src/bin/pg_upgrade/exec.c
@@ -14,7 +14,7 @@
#include
#include
-static void check_data_dir(const char *pg_data);
+static void check_data_dir(ClusterInfo *cluster);
static void check_bin_dir(ClusterInfo *cluster);
static void validate_exec(const char *dir, const char *cmdName);
@@ -220,9 +220,9 @@ verify_directories(void)
pg_fatal("You must have read and write access in the current directory.\n");
check_bin_dir(&old_cluster);
- check_data_dir(old_cluster.pgdata);
+ check_data_dir(&old_cluster);
check_bin_dir(&new_cluster);
- check_data_dir(new_cluster.pgdata);
+ check_data_dir(&new_cluster);
}
@@ -252,6 +252,32 @@ win32_check_directory_write_permissions(void)
#endif
+/*
+ * check_single_dir()
+ *
+ * Check for the presence of a single directory in PGDATA, and fail if
+ * is it missing or not accessible.
+ */
+static void
+check_single_dir(const char *pg_data, const char *subdir)
+{
+ struct stat statBuf;
+ char subDirName[MAXPGPATH];
+
+ snprintf(subDirName, sizeof(subDirName), "%s%s%s", pg_data,
+ /* Win32 can't stat() a directory with a trailing slash. */
+ *subdir ? "/" : "",
+ subdir);
+
+ if (stat(subDirName, &statBuf) != 0)
+ report_status(PG_FATAL, "check for \"%s\" failed: %s\n",
+ subDirName, strerror(errno));
+ else if (!S_ISDIR(statBuf.st_mode))
+ report_status(PG_FATAL, "%s is not a directory\n",
+ subDirName);
+}
+
+
/*
* check_data_dir()
*
@@ -262,34 +288,27 @@ win32_check_directory_write_permissions(void)
*
*/
static void
-check_data_dir(const char *pg_data)
+check_data_dir(ClusterInfo *cluster)
{
- char subDirName[MAXPGPATH];
- int subdirnum;
-
- /* start check with top-most directory */
- const char *requiredSubdirs[] = {"", "base", "global", "pg_clog",
- "pg_multixact", "pg_subtrans", "pg_tblspc", "pg_twophase",
- "pg_xlog"};
-
- for (subdirnum = 0;
- subdirnum < sizeof(requiredSubdirs) / sizeof(requiredSubdirs[0]);
- ++subdirnum)
- {
- struct stat statBuf;
-
- snprintf(subDirName, sizeof(subDirName), "%s%s%s", pg_data,
- /* Win32 can't stat() a directory with a trailing slash. */
- *requiredSubdirs[subdirnum] ? "/" : "",
- requiredSubdirs[subdirnum]);
-
- if (stat(subDirName, &statBuf) != 0)
- report_status(PG_FATAL, "check for \"%s\" failed: %s\n",
- subDirName, strerror(errno));
- else if (!S_ISDIR(statBuf.st_mode))
- report_status(PG_FATAL, "%s is not a directory\n",
- subDirName);
- }
+ const char *pg_data = cluster->pgdata;
+
+ /* get old and new cluster versions */
+ old_cluster.major_version = get_major_server_version(&old_cluster);
+ new_cluster.major_version = get_major_server_version(&new_cluster);
+
+ check_single_dir(pg_data, "");
+ check_single_dir(pg_data, "base");
+ check_single_dir(pg_data, "global");
+ check_single_dir(pg_data, "pg_multixact");
+ check_single_dir(pg_data, "pg_subtrans");
+ check_single_dir(pg_data, "pg_tblspc");
+ check_single_dir(pg_data, "pg_twophase");
+
+ /* pg_xlog has been renamed to pg_wal in post-10 cluster */
+ if (GET_MAJOR_VERSION(cluster->major_version) < 1000)
+ check_single_dir(pg_data, "pg_xlog");
+ else
+ check_single_dir(pg_data, "pg_wal");
}
diff --git a/src/bin/pg_xlogdump/pg_xlogdump.c b/src/bin/pg_xlogdump/pg_xlogdump.c
index 9ad9321e1f..74903980ae 100644
--- a/src/bin/pg_xlogdump/pg_xlogdump.c
+++ b/src/bin/pg_xlogdump/pg_xlogdump.c
@@ -680,7 +680,7 @@ usage(void)
printf(" -f, --follow keep retrying after reaching end of WAL\n");
printf(" -n, --limit=N number of records to display\n");
printf(" -p, --path=PATH directory in which to find log segment files\n");
- printf(" (default: ./pg_xlog)\n");
+ printf(" (default: ./pg_wal)\n");
printf(" -r, --rmgr=RMGR only show records generated by resource manager RMGR\n");
printf(" use --rmgr=list to list valid resource manager names\n");
printf(" -s, --start=RECPTR start reading at log position RECPTR\n");
diff --git a/src/common/file_utils.c b/src/common/file_utils.c
index 1855e2372c..2fdb4692ea 100644
--- a/src/common/file_utils.c
+++ b/src/common/file_utils.c
@@ -29,6 +29,11 @@
#define PG_FLUSH_DATA_WORKS 1
#endif
+/*
+ * pg_xlog has been renamed to pg_wal in version 10.
+ */
+#define MINIMUM_VERSION_FOR_PG_WAL 100000
+
#ifdef PG_FLUSH_DATA_WORKS
static int pre_sync_fname(const char *fname, bool isdir,
const char *progname);
@@ -40,25 +45,31 @@ static void walkdir(const char *path,
/*
* Issue fsync recursively on PGDATA and all its contents.
*
- * We fsync regular files and directories wherever they are, but we
- * follow symlinks only for pg_xlog and immediately under pg_tblspc.
- * Other symlinks are presumed to point at files we're not responsible
- * for fsyncing, and might not have privileges to write at all.
+ * We fsync regular files and directories wherever they are, but we follow
+ * symlinks only for pg_wal (or pg_xlog) and immediately under pg_tblspc.
+ * Other symlinks are presumed to point at files we're not responsible for
+ * fsyncing, and might not have privileges to write at all.
+ *
+ * serverVersion indicates the version of the server to be fsync'd.
*
* Errors are reported but not considered fatal.
*/
void
-fsync_pgdata(const char *pg_data, const char *progname)
+fsync_pgdata(const char *pg_data,
+ const char *progname,
+ int serverVersion)
{
bool xlog_is_symlink;
- char pg_xlog[MAXPGPATH];
+ char pg_wal[MAXPGPATH];
char pg_tblspc[MAXPGPATH];
- snprintf(pg_xlog, MAXPGPATH, "%s/pg_xlog", pg_data);
+ /* handle renaming of pg_xlog to pg_wal in post-10 clusters */
+ snprintf(pg_wal, MAXPGPATH, "%s/%s", pg_data,
+ serverVersion < MINIMUM_VERSION_FOR_PG_WAL ? "pg_xlog" : "pg_wal");
snprintf(pg_tblspc, MAXPGPATH, "%s/pg_tblspc", pg_data);
/*
- * If pg_xlog is a symlink, we'll need to recurse into it separately,
+ * If pg_wal is a symlink, we'll need to recurse into it separately,
* because the first walkdir below will ignore it.
*/
xlog_is_symlink = false;
@@ -67,14 +78,14 @@ fsync_pgdata(const char *pg_data, const char *progname)
{
struct stat st;
- if (lstat(pg_xlog, &st) < 0)
+ if (lstat(pg_wal, &st) < 0)
fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"),
- progname, pg_xlog, strerror(errno));
+ progname, pg_wal, strerror(errno));
else if (S_ISLNK(st.st_mode))
xlog_is_symlink = true;
}
#else
- if (pgwin32_is_junction(pg_xlog))
+ if (pgwin32_is_junction(pg_wal))
xlog_is_symlink = true;
#endif
@@ -85,7 +96,7 @@ fsync_pgdata(const char *pg_data, const char *progname)
#ifdef PG_FLUSH_DATA_WORKS
walkdir(pg_data, pre_sync_fname, false, progname);
if (xlog_is_symlink)
- walkdir(pg_xlog, pre_sync_fname, false, progname);
+ walkdir(pg_wal, pre_sync_fname, false, progname);
walkdir(pg_tblspc, pre_sync_fname, true, progname);
#endif
@@ -93,14 +104,14 @@ fsync_pgdata(const char *pg_data, const char *progname)
* Now we do the fsync()s in the same order.
*
* The main call ignores symlinks, so in addition to specially processing
- * pg_xlog if it's a symlink, pg_tblspc has to be visited separately with
+ * pg_wal if it's a symlink, pg_tblspc has to be visited separately with
* process_symlinks = true. Note that if there are any plain directories
* in pg_tblspc, they'll get fsync'd twice. That's not an expected case
* so we don't worry about optimizing it.
*/
walkdir(pg_data, fsync_fname, false, progname);
if (xlog_is_symlink)
- walkdir(pg_xlog, fsync_fname, false, progname);
+ walkdir(pg_wal, fsync_fname, false, progname);
walkdir(pg_tblspc, fsync_fname, true, progname);
}
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index 0a595ccc48..ceb0462098 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -128,7 +128,7 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader;
/*
* The XLog directory and control file (relative to $PGDATA)
*/
-#define XLOGDIR "pg_xlog"
+#define XLOGDIR "pg_wal"
#define XLOG_CONTROL_FILE "global/pg_control"
/*
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index bfa6b870a1..cd3048db86 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201610121
+#define CATALOG_VERSION_NO 201610201
#endif
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index 1cb263d9e2..b83c398235 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -17,7 +17,8 @@
extern int fsync_fname(const char *fname, bool isdir,
const char *progname);
-extern void fsync_pgdata(const char *pg_data, const char *progname);
+extern void fsync_pgdata(const char *pg_data, const char *progname,
+ int serverVersion);
extern int durable_rename(const char *oldfile, const char *newfile,
const char *progname);
extern int fsync_parent_path(const char *fname, const char *progname);
diff --git a/src/include/postmaster/pgarch.h b/src/include/postmaster/pgarch.h
index f2cbfb3952..6df73c946d 100644
--- a/src/include/postmaster/pgarch.h
+++ b/src/include/postmaster/pgarch.h
@@ -16,7 +16,7 @@
/* ----------
* Archiver control info.
*
- * We expect that archivable files within pg_xlog will have names between
+ * We expect that archivable files within pg_wal will have names between
* MIN_XFN_CHARS and MAX_XFN_CHARS in length, consisting only of characters
* appearing in VALID_XFN_CHARS. The status files in archive_status have
* corresponding names with ".ready" or ".done" appended.
--
cgit v1.2.3
From 7aa2c10ac6785a2de683609b98da607e588a6d02 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Fri, 21 Oct 2016 11:01:35 -0400
Subject: Doc: wording tweak for PERL, PYTHON, TCLSH configuration variables.
Replace "Full path to ..." with "Full path name of ...". At least one
user has misinterpreted the existing wording as meaning "Directory
containing ...".
---
doc/src/sgml/installation.sgml | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml
index 883e575946..296611d425 100644
--- a/doc/src/sgml/installation.sgml
+++ b/doc/src/sgml/installation.sgml
@@ -1429,7 +1429,7 @@ su - postgres
PERL
- Full path to the Perl interpreter. This will be used to
+ Full path name of the Perl interpreter. This will be used to
determine the dependencies for building PL/Perl.
@@ -1439,7 +1439,7 @@ su - postgres
PYTHON
- Full path to the Python interpreter. This will be used to
+ Full path name of the Python interpreter. This will be used to
determine the dependencies for building PL/Python. Also,
whether Python 2 or 3 is specified here (or otherwise
implicitly chosen) determines which variant of the PL/Python
@@ -1456,7 +1456,7 @@ su - postgres
TCLSH
- Full path to the Tcl interpreter. This will be used to
+ Full path name of the Tcl interpreter. This will be used to
determine the dependencies for building PL/Tcl, and it will
be substituted into Tcl scripts.
--
cgit v1.2.3
From eacaf6e29fd2a3047aff9738a35a8e9b05e55375 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Fri, 21 Oct 2016 19:43:06 -0400
Subject: First-draft release notes for 9.6.1.
As usual, the release notes for other branches will be made by cutting
these down, but put them up for community review first.
---
doc/src/sgml/release-9.6.sgml | 1123 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 1123 insertions(+)
(limited to 'doc/src')
diff --git a/doc/src/sgml/release-9.6.sgml b/doc/src/sgml/release-9.6.sgml
index 5c40910c72..ebdeda4445 100644
--- a/doc/src/sgml/release-9.6.sgml
+++ b/doc/src/sgml/release-9.6.sgml
@@ -1,6 +1,1129 @@
+
+ Release 9.6.1
+
+
+ Release Date
+ 2016-10-27
+
+
+
+ This release contains a variety of fixes from 9.6.0.
+ For information about new features in the 9.6 major release, see
+ .
+
+
+
+ Migration to Version 9.6.1
+
+
+ A dump/restore is not required for those running 9.6.X.
+
+
+
+ However, if your installation has been affected by the bugs described in
+ the first two changelog entries below, then after updating you may need
+ to take action to repair corrupted free space maps and/or visibility
+ maps.
+
+
+
+
+ Changes
+
+
+
+
+
+
+ Fix WAL-logging of truncation of relation free space maps and
+ visibility maps (Pavan Deolasee, Heikki Linnakangas)
+
+
+
+ It was possible for these files to not be correctly restored during
+ crash recovery, or to be written incorrectly on a standby server.
+ Bogus entries in a free space map could lead to attempts to access
+ pages that have been truncated away from the relation itself, typically
+ producing errors like could not read block XXX>:
+ read only 0 of 8192 bytes. Checksum failures in the
+ visibility map are also possible, if checksumming is enabled.
+
+
+
+ Procedures for determining whether there is a problem and repairing it
+ if so are discussed at
+ >.
+
+
+
+
+
+
+ Fix possible data corruption when pg_upgrade> rewrites
+ a relation visibility map into 9.6 format (Tom Lane)
+
+
+
+ On big-endian machines, bytes of the new visibility map were written
+ in the wrong order, leading to a completely incorrect map. On
+ Windows, the old map was read using text mode, leading to incorrect
+ results if the map happened to contain consecutive bytes that matched
+ a carriage return/line feed sequence. The latter error would almost
+ always lead to a pg_upgrade> failure due to the map
+ file appearing to be the wrong length.
+
+
+
+ If you are using a big-endian machine (many non-Intel architectures
+ are big-endian) and have used pg_upgrade> to upgrade
+ from a pre-9.6 release, you should assume that all visibility maps are
+ incorrect and need to be regenerated. It is sufficient to truncate
+ each relation's visibility map
+ with contrib/pg_visibility>'s
+ pg_truncate_visibility_map()> function.
+ For more information see
+ >.
+
+
+
+
+
+
+ Fix incorrect creation of GIN index WAL records on big-endian machines
+ (Tom Lane)
+
+
+
+ The typical symptom was unexpected GIN leaf action> errors
+ during WAL replay.
+
+
+
+
+
+
+ Fix SELECT FOR UPDATE/SHARE> to correctly lock tuples that
+ have been updated by a subsequently-aborted transaction
+ (Álvaro Herrera)
+
+
+
+ In 9.5 and later, the SELECT> would sometimes fail to
+ return such tuples at all. A failure has not been proven to occur in
+ earlier releases, but might be possible with concurrent updates.
+
+
+
+
+
+
+ Fix EvalPlanQual rechecks involving CTE scans (Tom Lane)
+
+
+
+ The recheck would always see the CTE as returning no rows, typically
+ leading to failure to update rows that were recently updated.
+
+
+
+
+
+
+ Fix deletion of speculatively inserted TOAST tuples when backing out
+ of INSERT ... ON CONFLICT> (Oskari Saarenmaa)
+
+
+
+ In the race condition where two transactions try to insert conflicting
+ tuples at about the same time, the loser would fail with
+ an attempted to delete invisible tuple> error if its
+ insertion included any TOAST'ed fields.
+
+
+
+
+
+
+ Fix use-after-free hazard in execution of aggregate functions
+ using DISTINCT> (Peter Geoghegan)
+
+
+
+ This could lead to a crash or incorrect query results.
+
+
+
+
+
+
+ Fix incorrect handling of polymorphic aggregates used as window
+ functions (Tom Lane)
+
+
+
+ The aggregate's transition function was told that its first argument
+ and result were of the aggregate's output type, rather than the
+ state type. This led to errors or crashes with
+ polymorphic transition functions.
+
+
+
+
+
+
+ Fix improper repetition of previous results from hashed aggregation in
+ a subquery (Andrew Gierth)
+
+
+
+ The test to see if we can reuse a previously-computed hash table of
+ the aggregate state values neglected the possibility of an outer query
+ reference appearing in an aggregate argument expression. A change in
+ the value of such a reference should lead to recalculating the hash
+ table, but did not.
+
+
+
+
+
+
+ Fix query-lifespan memory leak in a bulk UPDATE> on a table
+ with a PRIMARY KEY> or REPLICA IDENTITY> index
+ (Tom Lane)
+
+
+
+
+
+
+ Fix COPY> with a column name list from a table that has
+ row-level security enabled (Adam Brightwell)
+
+
+
+
+
+
+ Fix EXPLAIN> to emit valid XML when
+ is on (Markus Winand)
+
+
+
+ Previously the XML output-format option produced syntactically invalid
+ tags such as <I/O-Read-Time>>. That is now
+ rendered as <I-O-Read-Time>>.
+
+
+
+
+
+
+ Suppress printing of zeroes for unmeasured times
+ in EXPLAIN> (Maksim Milyutin)
+
+
+
+ Certain option combinations resulted in printing zero values for times
+ that actually aren't ever measured in that combination. Our general
+ policy in EXPLAIN> is not to print such fields at all, so
+ do that consistently in all cases.
+
+
+
+
+
+
+ Fix statistics update for TRUNCATE> in a prepared
+ transaction (Stas Kelvich)
+
+
+
+
+
+
+ Fix timeout length when VACUUM> is waiting for exclusive
+ table lock so that it can truncate the table (Simon Riggs)
+
+
+
+ The timeout was meant to be 50 milliseconds, but it was actually only
+ 50 microseconds, causing VACUUM> to give up on truncation
+ much more easily than intended. Set it to the intended value.
+
+
+
+
+
+
+ Fix bugs in merging inherited CHECK> constraints while
+ creating or altering a table (Tom Lane, Amit Langote)
+
+
+
+ Allow identical CHECK> constraints to be added to a parent
+ and child table in either order. Prevent merging of a valid
+ constraint from the parent table with a NOT VALID>
+ constraint on the child. Likewise, prevent merging of a NO
+ INHERIT> child constraint with an inherited constraint.
+
+
+
+
+
+
+ Show a sensible value
+ in pg_settings>.unit>
+ for min_wal_size> and max_wal_size> (Tom Lane)
+
+
+
+
+
+
+ Fix replacement of array elements in jsonb_set()>
+ (Tom Lane)
+
+
+
+ If the target is an existing JSON array element, it got deleted
+ instead of being replaced with a new value.
+
+
+
+
+
+
+ Remove artificial restrictions on the values accepted
+ by numeric_in()> and numeric_recv()>
+ (Tom Lane)
+
+
+
+ We allow numeric values up to the limit of the storage format (more
+ than 1e100000>), so it seems fairly pointless
+ that numeric_in()> rejected scientific-notation exponents
+ above 1000. Likewise, it was silly for numeric_recv()> to
+ reject more than 1000 digits in an input value.
+
+
+
+
+
+
+ Fix logical WAL decoding to work properly when a subtransaction's WAL
+ output is large enough to spill to disk (Andres Freund)
+
+
+
+
+
+
+ Fix dangling-pointer problem in logical WAL decoding (Stas Kelvich)
+
+
+
+
+
+
+ Fix buffer overread in logical WAL decoding (Tom Lane)
+
+
+
+ Logical decoding of a tuple update record read 23 bytes too many,
+ which was usually harmless but with very bad luck could result in a
+ crash.
+
+
+
+
+
+
+ Fix possible sorting error when aborting use of abbreviated keys
+ (Peter Geoghegan)
+
+
+
+ In the worst case, this could result in a corrupt btree index, which
+ would need to be rebuilt using REINDEX>. However, the
+ situation is believed to be rare.
+
+
+
+
+
+
+ Fix file descriptor leakage when truncating a temporary relation of
+ more than 1GB (Andres Freund)
+
+
+
+
+
+
+ Disallow starting a standalone backend with standby_mode>
+ turned on (Michael Paquier)
+
+
+
+ This can't do anything useful, since there will be no WAL receiver
+ process to fetch more WAL data; and it could result in misbehavior
+ in code that wasn't designed with this situation in mind.
+
+
+
+
+
+
+ Properly initialize replication slot state when recycling a
+ previously-used slot (Michael Paquier)
+
+
+
+ This failure to reset all of the fields of the slot could
+ prevent VACUUM> from removing dead tuples.
+
+
+
+
+
+
+ Round shared-memory allocation request to a multiple of the actual
+ huge page size when attempting to use huge pages on Linux (Tom Lane)
+
+
+
+ This avoids possible failures during munmap()> on systems
+ with atypical default huge page sizes. Except in crash-recovery
+ cases, there were no ill effects other than a log message.
+
+
+
+
+
+
+ Use a more random value for the dynamic shared memory control
+ segment's ID (Robert Haas, Tom Lane)
+
+
+
+ Previously, the same value would be chosen every time, because it was
+ derived from random()> but srandom()> had not
+ yet been called. While relatively harmless, this was not the intended
+ behavior.
+
+
+
+
+
+
+ On Windows, retry creation of the dynamic shared memory control
+ segment after an access-denied error (Kyotaro Horiguchi, Amit Kapila)
+
+
+
+ Windows sometimes returns ERROR_ACCESS_DENIED> rather
+ than ERROR_ALREADY_EXISTS> when there is an existing
+ segment. This led to postmaster startup failure due to believing that
+ the former was an unrecoverable error.
+
+
+
+
+
+
+ Fix PL/pgSQL> to not misbehave with parameters and
+ local variables of type int2vector> or oidvector>
+ (Tom Lane)
+
+
+
+
+
+
+ Don't try to share SSL contexts across multiple connections
+ in libpq> (Heikki Linnakangas)
+
+
+
+ This led to assorted corner-case bugs, particularly when trying to use
+ different SSL parameters for different connections.
+
+
+
+
+
+
+ Avoid corner-case memory leak in libpq> (Tom Lane)
+
+
+
+ The reported problem involved leaking an error report
+ during PQreset()>, but there might be related cases.
+
+
+
+
+
+
+ Make ecpg>'s
--help> and
--version>
+ options work consistently with our other executables (Haribabu Kommi)
+
+
+
+
+
+
+ Fix pgbench>'s calculation of average latency
+ (Fabien Coelho)
+
+
+
+ The calculation was incorrect when there were \sleep>
+ commands in the script, or when the test duration was specified in
+ number of transactions rather than total time.
+
+
+
+
+
+
+ In pg_upgrade>, check library loadability in name order
+ (Tom Lane)
+
+
+
+ This is a workaround to deal with cross-extension dependencies from
+ language transform modules to their base language and data type
+ modules.
+
+
+
+
+
+
+ Fix pg_upgrade> to work correctly for extensions
+ containing index access methods (Tom Lane)
+
+
+
+ To allow this, the server has been extended to support ALTER
+ EXTENSION ADD/DROP ACCESS METHOD>. That functionality should have
+ been included in the original patch to support dynamic creation of
+ access methods, but it was overlooked.
+
+
+
+
+
+
+ Improve error reporting in pg_upgrade>'s file
+ copying/linking/rewriting steps (Tom Lane, Álvaro Herrera)
+
+
+
+
+
+
+ In pg_dump>, never dump range constructor functions
+ (Tom Lane)
+
+
+
+ This oversight led to pg_upgrade> failures with
+ extensions containing range types, due to duplicate creation of the
+ constructor functions.
+
+
+
+
+
+
+ In pg_dump> with
-C>,
+ suppress TABLESPACE> clause of CREATE DATABASE>
+ if
--no-tablespaces> is specified (Tom Lane)
+
+
+
+
+
+
+ Fix pg_dump> to work against pre-7.4 servers
+ (Amit Langote, Tom Lane)
+
+
+
+
+
+
+ Make pg_receivexlog> work correctly
+ with
--synchronous> without slots (Gabriele Bartolini)
+
+
+
+
+
+
+ Disallow specifying both
--source-server>
+ and
--source-target> options to pg_rewind>
+ (Michael Banck)
+
+
+
+
+
+
+ Make pg_rewind> turn off synchronous_commit>
+ in its session on the source server (Michael Banck, Michael Paquier)
+
+
+
+ This allows pg_rewind> to work even when the source
+ server is using synchronous replication that is not working for some
+ reason.
+
+
+
+
+
+
+ In pg_xlogdump>, retry opening new WAL segments when
+ using
--follow> option (Magnus Hagander)
+
+
+
+ This allows for a possible delay in the server's creation of the next
+ segment.
+
+
+
+
+
+
+ Fix pg_xlogdump> to cope with a WAL file that begins
+ with a continuation record spanning more than one page (Pavan
+ Deolasee)
+
+
+
+
+
+
+ Fix contrib/pg_buffercache> to work
+ when shared_buffers> exceeds 256GB (KaiGai Kohei)
+
+
+
+
+
+
+ Fix contrib/intarray/bench/bench.pl> to print the results
+ of the EXPLAIN> it does when given the
-e> option
+ (Daniel Gustafsson)
+
+
+
+
+
+
+ Fix contrib/pg_visibility> to report the correct TID for
+ a corrupt tuple that has been the subject of a rolled-back update
+ (Tom Lane)
+
+
+
+
+
+
+ Support OpenSSL 1.1.0 (Heikki Linnakangas)
+
+
+
+
+
+
+ Fix makefile dependencies so that parallel make
+ of PL/Python> by itself will succeed reliably
+ (Pavel Raiskup)
+
+
+
+
+
+
+ Install TAP test infrastructure so that it's available for extension
+ testing (Craig Ringer)
+
+
+
+ When PostgreSQL> has been configured
+ with
--enable-tap-tests>, make install> will now
+ install the Perl support files for TAP testing where PGXS can find
+ them. This allows non-core extensions to
+ use $(prove_check)> without extra tests.
+
+
+
+
+
+
+ In MSVC builds, include pg_recvlogical> in a
+ client-only installation (MauMau)
+
+
+
+
+
+
+ Update Windows time zone mapping to recognize some time zone names
+ added in recent Windows versions (Michael Paquier)
+
+
+
+
+
+
+ Prevent failure of obsolete dynamic time zone abbreviations (Tom Lane)
+
+
+
+ If a dynamic time zone abbreviation does not match any entry in the
+ referenced time zone, treat it as equivalent to the time zone name.
+ This avoids unexpected failures when IANA removes abbreviations from
+ their time zone database, as they did in tzdata>
+ release 2016f and seem likely to do again in the future. The
+ consequences were not limited to not recognizing the individual
+ abbreviation; any mismatch caused
+ the pg_timezone_abbrevs> view to fail altogether.
+
+
+
+
+
+
+ Update time zone data files to tzdata> release 2016h
+ for DST law changes in Palestine and Turkey, plus historical
+ corrections for Turkey and some regions of Russia.
+ Switch to numeric abbreviations for some time zones in Antarctica,
+ the former Soviet Union, and Sri Lanka.
+
+
+
+ The IANA time zone database previously provided textual abbreviations
+ for all time zones, sometimes making up abbreviations that have little
+ or no currency among the local population. They are in process of
+ reversing that policy in favor of using numeric UTC offsets in zones
+ where there is no evidence of real-world use of an English
+ abbreviation. At least for the time being, PostgreSQL>
+ will continue to accept such removed abbreviations for timestamp input.
+ But they will not be shown in the pg_timezone_names>
+ view nor used for output.
+
+
+
+ In this update, AMT> is no longer shown as being in use to
+ mean Armenia Time. Therefore, we have changed the Default>
+ abbreviation set to interpret it as Amazon Time, thus UTC-4 not UTC+4.
+
+
+
+
+
+
+
+
Release 9.6
--
cgit v1.2.3
From 1885c88459698251eca64f095d9942c540ba0fa8 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Sat, 22 Oct 2016 14:04:51 -0400
Subject: Improve documentation about use of Linux huge pages.
Show how to get the system's huge page size, rather than misleadingly
referring to PAGE_SIZE (which is usually understood to be the regular
page size). Show how to confirm whether huge pages have been allocated.
Minor wordsmithing. Back-patch to 9.4 where this section appeared.
---
doc/src/sgml/runtime.sgml | 62 +++++++++++++++++++++++++++++------------------
1 file changed, 38 insertions(+), 24 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml
index 6ae62b4d2b..787cfce987 100644
--- a/doc/src/sgml/runtime.sgml
+++ b/doc/src/sgml/runtime.sgml
@@ -1381,53 +1381,67 @@ export PG_OOM_ADJUST_VALUE=0
- Linux huge pages
+ Linux Huge Pages
Using huge pages reduces overhead when using large contiguous chunks of
- memory, like PostgreSQL does. To enable this
+ memory, as PostgreSQL does, particularly when
+ using large values of . To use this
feature in PostgreSQL you need a kernel
with CONFIG_HUGETLBFS=y and
- CONFIG_HUGETLB_PAGE=y. You also have to tune the system
- setting vm.nr_hugepages. To estimate the number of
- necessary huge pages start PostgreSQL without
- huge pages enabled and check the VmPeak value from the
- proc file system:
+ CONFIG_HUGETLB_PAGE=y. You will also have to adjust
+ the kernel setting vm.nr_hugepages. To estimate the
+ number of huge pages needed, start PostgreSQL
+ without huge pages enabled and check the
+ postmaster's VmPeak value, as well as the system's
+ huge page size, using the /proc> file system. This might
+ look like:
-$ head -1 /path/to/data/directory/postmaster.pid
+$ head -1 $PGDATA/postmaster.pid
4170
$ grep ^VmPeak /proc/4170/status
VmPeak: 6490428 kB
+$ grep ^Hugepagesize /proc/meminfo
+Hugepagesize: 2048 kB
- 6490428 / 2048
- (PAGE_SIZE is 2MB in this case) are
- roughly 3169.154 huge pages, so you will need at
- least 3170 huge pages:
+ 6490428 / 2048 gives approximately
+ 3169.154, so in this example we need at
+ least 3170 huge pages, which we can set with:
$ sysctl -w vm.nr_hugepages=3170
+ A larger setting would be appropriate if other programs on the machine
+ also need huge pages. Don't forget to add this setting
+ to /etc/sysctl.conf so that it will be reapplied
+ after reboots.
+
+
+
Sometimes the kernel is not able to allocate the desired number of huge
- pages, so it might be necessary to repeat that command or to reboot. Don't
- forget to add an entry to /etc/sysctl.conf to persist
- this setting through reboots.
+ pages immediately, so it might be necessary to repeat the command or to
+ reboot. (Immediately after a reboot, most of the machine's memory
+ should be available to convert into huge pages.) To verify the huge
+ page allocation situation, use:
+
+$ grep Huge /proc/meminfo
+
- It is also necessary to give the database server operating system
+ It may also be necessary to give the database server's operating system
user permission to use huge pages by setting
- vm.hugetlb_shm_group> via sysctl>, and
- permission to lock memory with ulimit -l>.
+ vm.hugetlb_shm_group> via sysctl>, and/or
+ give permission to lock memory with ulimit -l>.
The default behavior for huge pages in
PostgreSQL is to use them when possible and
- to fallback to normal pages when failing. To enforce the use of huge
- pages, you can set
- huge_pages
- to on. Note that in this case
- PostgreSQL will fail to start if not enough huge
- pages are available.
+ to fall back to normal pages when failing. To enforce the use of huge
+ pages, you can set
+ to on in postgresql.conf>.
+ Note that with this setting PostgreSQL> will fail to
+ start if not enough huge pages are available.
--
cgit v1.2.3
From 56c7d8d4552180fd66fe48423bb2a9bb767c2d87 Mon Sep 17 00:00:00 2001
From: Magnus Hagander
Date: Sun, 23 Oct 2016 15:16:31 +0200
Subject: Allow pg_basebackup to stream transaction log in tar mode
This will write the received transaction log into a file called
pg_wal.tar(.gz) next to the other tarfiles instead of writing it to
base.tar. When using fetch mode, the transaction log is still written to
base.tar like before, and when used against a pre-10 server, the file
is named pg_xlog.tar.
To do this, implement a new concept of a "walmethod", which is
responsible for writing the WAL. Two implementations exist, one that
writes to a plain directory (which is also used by pg_receivexlog) and
one that writes to a tar file with optional compression.
Reviewed by Michael Paquier
---
doc/src/sgml/ref/pg_basebackup.sgml | 18 +-
src/bin/pg_basebackup/Makefile | 2 +-
src/bin/pg_basebackup/pg_basebackup.c | 62 +-
src/bin/pg_basebackup/pg_receivexlog.c | 10 +-
src/bin/pg_basebackup/receivelog.c | 316 ++++------
src/bin/pg_basebackup/receivelog.h | 3 +-
src/bin/pg_basebackup/t/010_pg_basebackup.pl | 6 +-
src/bin/pg_basebackup/walmethods.c | 886 +++++++++++++++++++++++++++
src/bin/pg_basebackup/walmethods.h | 45 ++
src/include/pgtar.h | 1 +
src/port/tar.c | 2 +-
11 files changed, 1107 insertions(+), 244 deletions(-)
create mode 100644 src/bin/pg_basebackup/walmethods.c
create mode 100644 src/bin/pg_basebackup/walmethods.h
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index 7cb690dded..e66a7ae8ee 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -180,7 +180,8 @@ PostgreSQL documentation
target directory, the tar contents will be written to
standard output, suitable for piping to for example
gzip. This is only possible if
- the cluster has no additional tablespaces.
+ the cluster has no additional tablespaces and transaction
+ log streaming is not used.
@@ -323,6 +324,10 @@ PostgreSQL documentation
If the log has been rotated when it's time to transfer it, the
backup will fail and be unusable.
+
+ The transaction log files will be written to
+ the base.tar file.
+
@@ -339,6 +344,11 @@ PostgreSQL documentation
client can keep up with transaction log received, using this mode
requires no extra transaction logs to be saved on the master.
+
+ The transaction log files are written to a separate file
+ named pg_wal.tar (if the server is a version
+ earlier than 10, the file will be named pg_xlog.tar).
+
@@ -353,7 +363,8 @@ PostgreSQL documentation
Enables gzip compression of tar file output, with the default
compression level. Compression is only available when using
- the tar format.
+ the tar format, and the suffix .gz will
+ automatically be added to all tar filenames.
@@ -366,7 +377,8 @@ PostgreSQL documentation
Enables gzip compression of tar file output, and specifies the
compression level (0 through 9, 0 being no compression and 9 being best
compression). Compression is only available when using the tar
- format.
+ format, and the suffix .gz will
+ automatically be added to all tar filenames.
diff --git a/src/bin/pg_basebackup/Makefile b/src/bin/pg_basebackup/Makefile
index fa1ce8b24d..52ac9e9fb8 100644
--- a/src/bin/pg_basebackup/Makefile
+++ b/src/bin/pg_basebackup/Makefile
@@ -19,7 +19,7 @@ include $(top_builddir)/src/Makefile.global
override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS)
LDFLAGS += -L$(top_builddir)/src/fe_utils -lpgfeutils -lpq
-OBJS=receivelog.o streamutil.o $(WIN32RES)
+OBJS=receivelog.o streamutil.o walmethods.o $(WIN32RES)
all: pg_basebackup pg_receivexlog pg_recvlogical
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index b82b8e1b26..16cab978d0 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -449,7 +449,7 @@ typedef struct
{
PGconn *bgconn;
XLogRecPtr startptr;
- char xlogdir[MAXPGPATH];
+ char xlog[MAXPGPATH]; /* directory or tarfile depending on mode */
char *sysidentifier;
int timeline;
} logstreamer_param;
@@ -470,9 +470,13 @@ LogStreamerMain(logstreamer_param *param)
stream.synchronous = false;
stream.do_sync = do_sync;
stream.mark_done = true;
- stream.basedir = param->xlogdir;
stream.partial_suffix = NULL;
+ if (format == 'p')
+ stream.walmethod = CreateWalDirectoryMethod(param->xlog, do_sync);
+ else
+ stream.walmethod = CreateWalTarMethod(param->xlog, compresslevel, do_sync);
+
if (!ReceiveXlogStream(param->bgconn, &stream))
/*
@@ -482,6 +486,14 @@ LogStreamerMain(logstreamer_param *param)
*/
return 1;
+ if (!stream.walmethod->finish())
+ {
+ fprintf(stderr,
+ _("%s: could not finish writing WAL files: %s\n"),
+ progname, strerror(errno));
+ return 1;
+ }
+
PQfinish(param->bgconn);
return 0;
}
@@ -533,28 +545,32 @@ StartLogStreamer(char *startpos, uint32 timeline, char *sysidentifier)
exit(1);
/* In post-10 cluster, pg_xlog has been renamed to pg_wal */
- snprintf(param->xlogdir, sizeof(param->xlogdir), "%s/%s",
+ snprintf(param->xlog, sizeof(param->xlog), "%s/%s",
basedir,
PQserverVersion(conn) < MINIMUM_VERSION_FOR_PG_WAL ?
"pg_xlog" : "pg_wal");
- /*
- * Create pg_wal/archive_status or pg_xlog/archive_status (and thus
- * pg_wal or pg_xlog) depending on the target server so we can write to
- * basedir/pg_wal or basedir/pg_xlog as the directory entry in the tar
- * file may arrive later.
- */
- snprintf(statusdir, sizeof(statusdir), "%s/%s/archive_status",
- basedir,
- PQserverVersion(conn) < MINIMUM_VERSION_FOR_PG_WAL ?
- "pg_xlog" : "pg_wal");
- if (pg_mkdir_p(statusdir, S_IRWXU) != 0 && errno != EEXIST)
+ if (format == 'p')
{
- fprintf(stderr,
- _("%s: could not create directory \"%s\": %s\n"),
- progname, statusdir, strerror(errno));
- disconnect_and_exit(1);
+ /*
+ * Create pg_wal/archive_status or pg_xlog/archive_status (and thus
+ * pg_wal or pg_xlog) depending on the target server so we can write to
+ * basedir/pg_wal or basedir/pg_xlog as the directory entry in the tar
+ * file may arrive later.
+ */
+ snprintf(statusdir, sizeof(statusdir), "%s/%s/archive_status",
+ basedir,
+ PQserverVersion(conn) < MINIMUM_VERSION_FOR_PG_WAL ?
+ "pg_xlog" : "pg_wal");
+
+ if (pg_mkdir_p(statusdir, S_IRWXU) != 0 && errno != EEXIST)
+ {
+ fprintf(stderr,
+ _("%s: could not create directory \"%s\": %s\n"),
+ progname, statusdir, strerror(errno));
+ disconnect_and_exit(1);
+ }
}
/*
@@ -2245,16 +2261,6 @@ main(int argc, char **argv)
exit(1);
}
- if (format != 'p' && streamwal)
- {
- fprintf(stderr,
- _("%s: WAL streaming can only be used in plain mode\n"),
- progname);
- fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
- progname);
- exit(1);
- }
-
if (replication_slot && !streamwal)
{
fprintf(stderr,
diff --git a/src/bin/pg_basebackup/pg_receivexlog.c b/src/bin/pg_basebackup/pg_receivexlog.c
index a58a251a59..bbdf96edfd 100644
--- a/src/bin/pg_basebackup/pg_receivexlog.c
+++ b/src/bin/pg_basebackup/pg_receivexlog.c
@@ -338,11 +338,19 @@ StreamLog(void)
stream.synchronous = synchronous;
stream.do_sync = true;
stream.mark_done = false;
- stream.basedir = basedir;
+ stream.walmethod = CreateWalDirectoryMethod(basedir, stream.do_sync);
stream.partial_suffix = ".partial";
ReceiveXlogStream(conn, &stream);
+ if (!stream.walmethod->finish())
+ {
+ fprintf(stderr,
+ _("%s: could not finish writing WAL files: %s\n"),
+ progname, strerror(errno));
+ return;
+ }
+
PQfinish(conn);
conn = NULL;
}
diff --git a/src/bin/pg_basebackup/receivelog.c b/src/bin/pg_basebackup/receivelog.c
index b0fa916b44..fcd0269473 100644
--- a/src/bin/pg_basebackup/receivelog.c
+++ b/src/bin/pg_basebackup/receivelog.c
@@ -30,7 +30,7 @@
/* fd and filename for currently open WAL file */
-static int walfile = -1;
+static Walfile *walfile = NULL;
static char current_walfile_name[MAXPGPATH] = "";
static bool reportFlushPosition = false;
static XLogRecPtr lastFlushPosition = InvalidXLogRecPtr;
@@ -56,29 +56,23 @@ static bool ReadEndOfStreamingResult(PGresult *res, XLogRecPtr *startpos,
uint32 *timeline);
static bool
-mark_file_as_archived(const char *basedir, const char *fname, bool do_sync)
+mark_file_as_archived(StreamCtl *stream, const char *fname)
{
- int fd;
+ Walfile *f;
static char tmppath[MAXPGPATH];
- snprintf(tmppath, sizeof(tmppath), "%s/archive_status/%s.done",
- basedir, fname);
+ snprintf(tmppath, sizeof(tmppath), "archive_status/%s.done",
+ fname);
- fd = open(tmppath, O_WRONLY | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR);
- if (fd < 0)
+ f = stream->walmethod->open_for_write(tmppath, NULL, 0);
+ if (f == NULL)
{
fprintf(stderr, _("%s: could not create archive status file \"%s\": %s\n"),
- progname, tmppath, strerror(errno));
+ progname, tmppath, stream->walmethod->getlasterror());
return false;
}
- close(fd);
-
- if (do_sync && fsync_fname(tmppath, false, progname) != 0)
- return false;
-
- if (do_sync && fsync_parent_path(tmppath, progname) != 0)
- return false;
+ stream->walmethod->close(f, CLOSE_NORMAL);
return true;
}
@@ -95,121 +89,82 @@ mark_file_as_archived(const char *basedir, const char *fname, bool do_sync)
static bool
open_walfile(StreamCtl *stream, XLogRecPtr startpoint)
{
- int f;
+ Walfile *f;
char fn[MAXPGPATH];
- struct stat statbuf;
- char *zerobuf;
- int bytes;
+ ssize_t size;
XLogSegNo segno;
XLByteToSeg(startpoint, segno);
XLogFileName(current_walfile_name, stream->timeline, segno);
- snprintf(fn, sizeof(fn), "%s/%s%s", stream->basedir, current_walfile_name,
+ snprintf(fn, sizeof(fn), "%s%s", current_walfile_name,
stream->partial_suffix ? stream->partial_suffix : "");
- f = open(fn, O_WRONLY | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR);
- if (f == -1)
- {
- fprintf(stderr,
- _("%s: could not open transaction log file \"%s\": %s\n"),
- progname, fn, strerror(errno));
- return false;
- }
/*
- * Verify that the file is either empty (just created), or a complete
- * XLogSegSize segment. Anything in between indicates a corrupt file.
+ * When streaming to files, if an existing file exists we verify that it's
+ * either empty (just created), or a complete XLogSegSize segment (in
+ * which case it has been created and padded). Anything else indicates a
+ * corrupt file.
+ *
+ * When streaming to tar, no file with this name will exist before, so we
+ * never have to verify a size.
*/
- if (fstat(f, &statbuf) != 0)
+ if (stream->walmethod->existsfile(fn))
{
- fprintf(stderr,
- _("%s: could not stat transaction log file \"%s\": %s\n"),
- progname, fn, strerror(errno));
- close(f);
- return false;
- }
- if (statbuf.st_size == XLogSegSize)
- {
- /*
- * fsync, in case of a previous crash between padding and fsyncing the
- * file.
- */
- if (stream->do_sync)
+ size = stream->walmethod->get_file_size(fn);
+ if (size < 0)
{
- if (fsync_fname(fn, false, progname) != 0 ||
- fsync_parent_path(fn, progname) != 0)
+ fprintf(stderr,
+ _("%s: could not get size of transaction log file \"%s\": %s\n"),
+ progname, fn, stream->walmethod->getlasterror());
+ return false;
+ }
+ if (size == XLogSegSize)
+ {
+ /* Already padded file. Open it for use */
+ f = stream->walmethod->open_for_write(current_walfile_name, stream->partial_suffix, 0);
+ if (f == NULL)
{
- /* error already printed */
- close(f);
+ fprintf(stderr,
+ _("%s: could not open existing transaction log file \"%s\": %s\n"),
+ progname, fn, stream->walmethod->getlasterror());
return false;
}
- }
- /* File is open and ready to use */
- walfile = f;
- return true;
- }
- if (statbuf.st_size != 0)
- {
- fprintf(stderr,
- _("%s: transaction log file \"%s\" has %d bytes, should be 0 or %d\n"),
- progname, fn, (int) statbuf.st_size, XLogSegSize);
- close(f);
- return false;
- }
+ /* fsync file in case of a previous crash */
+ if (!stream->walmethod->fsync(f))
+ {
+ stream->walmethod->close(f, CLOSE_UNLINK);
+ return false;
+ }
- /*
- * New, empty, file. So pad it to 16Mb with zeroes. If we fail partway
- * through padding, we should attempt to unlink the file on failure, so as
- * not to leave behind a partially-filled file.
- */
- zerobuf = pg_malloc0(XLOG_BLCKSZ);
- for (bytes = 0; bytes < XLogSegSize; bytes += XLOG_BLCKSZ)
- {
- errno = 0;
- if (write(f, zerobuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+ walfile = f;
+ return true;
+ }
+ if (size != 0)
{
/* if write didn't set errno, assume problem is no disk space */
if (errno == 0)
errno = ENOSPC;
fprintf(stderr,
- _("%s: could not pad transaction log file \"%s\": %s\n"),
- progname, fn, strerror(errno));
- free(zerobuf);
- close(f);
- unlink(fn);
+ _("%s: transaction log file \"%s\" has %d bytes, should be 0 or %d\n"),
+ progname, fn, (int) size, XLogSegSize);
return false;
}
+ /* File existed and was empty, so fall through and open */
}
- free(zerobuf);
- /*
- * fsync WAL file and containing directory, to ensure the file is
- * persistently created and zeroed. That's particularly important when
- * using synchronous mode, where the file is modified and fsynced
- * in-place, without a directory fsync.
- */
- if (stream->do_sync)
- {
- if (fsync_fname(fn, false, progname) != 0 ||
- fsync_parent_path(fn, progname) != 0)
- {
- /* error already printed */
- close(f);
- return false;
- }
- }
+ /* No file existed, so create one */
- if (lseek(f, SEEK_SET, 0) != 0)
+ f = stream->walmethod->open_for_write(current_walfile_name, stream->partial_suffix, XLogSegSize);
+ if (f == NULL)
{
fprintf(stderr,
- _("%s: could not seek to beginning of transaction log file \"%s\": %s\n"),
- progname, fn, strerror(errno));
- close(f);
+ _("%s: could not open transaction log file \"%s\": %s\n"),
+ progname, fn, stream->walmethod->getlasterror());
return false;
}
- /* File is open and ready to use */
walfile = f;
return true;
}
@@ -223,59 +178,46 @@ static bool
close_walfile(StreamCtl *stream, XLogRecPtr pos)
{
off_t currpos;
+ int r;
- if (walfile == -1)
+ if (walfile == NULL)
return true;
- currpos = lseek(walfile, 0, SEEK_CUR);
+ currpos = stream->walmethod->get_current_pos(walfile);
if (currpos == -1)
{
fprintf(stderr,
_("%s: could not determine seek position in file \"%s\": %s\n"),
- progname, current_walfile_name, strerror(errno));
- close(walfile);
- walfile = -1;
+ progname, current_walfile_name, stream->walmethod->getlasterror());
+ stream->walmethod->close(walfile, CLOSE_UNLINK);
+ walfile = NULL;
+
return false;
}
- if (stream->do_sync && fsync(walfile) != 0)
+ if (stream->partial_suffix)
{
- fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"),
- progname, current_walfile_name, strerror(errno));
- close(walfile);
- walfile = -1;
- return false;
+ if (currpos == XLOG_SEG_SIZE)
+ r = stream->walmethod->close(walfile, CLOSE_NORMAL);
+ else
+ {
+ fprintf(stderr,
+ _("%s: not renaming \"%s%s\", segment is not complete\n"),
+ progname, current_walfile_name, stream->partial_suffix);
+ r = stream->walmethod->close(walfile, CLOSE_NO_RENAME);
+ }
}
+ else
+ r = stream->walmethod->close(walfile, CLOSE_NORMAL);
- if (close(walfile) != 0)
+ walfile = NULL;
+
+ if (r != 0)
{
fprintf(stderr, _("%s: could not close file \"%s\": %s\n"),
- progname, current_walfile_name, strerror(errno));
- walfile = -1;
+ progname, current_walfile_name, stream->walmethod->getlasterror());
return false;
}
- walfile = -1;
-
- /*
- * If we finished writing a .partial file, rename it into place.
- */
- if (currpos == XLOG_SEG_SIZE && stream->partial_suffix)
- {
- char oldfn[MAXPGPATH];
- char newfn[MAXPGPATH];
-
- snprintf(oldfn, sizeof(oldfn), "%s/%s%s", stream->basedir, current_walfile_name, stream->partial_suffix);
- snprintf(newfn, sizeof(newfn), "%s/%s", stream->basedir, current_walfile_name);
- if (durable_rename(oldfn, newfn, progname) != 0)
- {
- /* durable_rename produced a log entry */
- return false;
- }
- }
- else if (stream->partial_suffix)
- fprintf(stderr,
- _("%s: not renaming \"%s%s\", segment is not complete\n"),
- progname, current_walfile_name, stream->partial_suffix);
/*
* Mark file as archived if requested by the caller - pg_basebackup needs
@@ -286,8 +228,7 @@ close_walfile(StreamCtl *stream, XLogRecPtr pos)
if (currpos == XLOG_SEG_SIZE && stream->mark_done)
{
/* writes error message if failed */
- if (!mark_file_as_archived(stream->basedir, current_walfile_name,
- stream->do_sync))
+ if (!mark_file_as_archived(stream, current_walfile_name))
return false;
}
@@ -302,9 +243,7 @@ close_walfile(StreamCtl *stream, XLogRecPtr pos)
static bool
existsTimeLineHistoryFile(StreamCtl *stream)
{
- char path[MAXPGPATH];
char histfname[MAXFNAMELEN];
- int fd;
/*
* Timeline 1 never has a history file. We treat that as if it existed,
@@ -315,31 +254,15 @@ existsTimeLineHistoryFile(StreamCtl *stream)
TLHistoryFileName(histfname, stream->timeline);
- snprintf(path, sizeof(path), "%s/%s", stream->basedir, histfname);
-
- fd = open(path, O_RDONLY | PG_BINARY, 0);
- if (fd < 0)
- {
- if (errno != ENOENT)
- fprintf(stderr, _("%s: could not open timeline history file \"%s\": %s\n"),
- progname, path, strerror(errno));
- return false;
- }
- else
- {
- close(fd);
- return true;
- }
+ return stream->walmethod->existsfile(histfname);
}
static bool
writeTimeLineHistoryFile(StreamCtl *stream, char *filename, char *content)
{
int size = strlen(content);
- char path[MAXPGPATH];
- char tmppath[MAXPGPATH];
char histfname[MAXFNAMELEN];
- int fd;
+ Walfile *f;
/*
* Check that the server's idea of how timeline history files should be
@@ -353,53 +276,31 @@ writeTimeLineHistoryFile(StreamCtl *stream, char *filename, char *content)
return false;
}
- snprintf(path, sizeof(path), "%s/%s", stream->basedir, histfname);
-
- /*
- * Write into a temp file name.
- */
- snprintf(tmppath, MAXPGPATH, "%s.tmp", path);
-
- unlink(tmppath);
-
- fd = open(tmppath, O_WRONLY | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR);
- if (fd < 0)
+ f = stream->walmethod->open_for_write(histfname, ".tmp", 0);
+ if (f == NULL)
{
fprintf(stderr, _("%s: could not create timeline history file \"%s\": %s\n"),
- progname, tmppath, strerror(errno));
+ progname, histfname, stream->walmethod->getlasterror());
return false;
}
- errno = 0;
- if ((int) write(fd, content, size) != size)
+ if ((int) stream->walmethod->write(f, content, size) != size)
{
- int save_errno = errno;
+ fprintf(stderr, _("%s: could not write timeline history file \"%s\": %s\n"),
+ progname, histfname, stream->walmethod->getlasterror());
/*
* If we fail to make the file, delete it to release disk space
*/
- close(fd);
- unlink(tmppath);
- errno = save_errno;
+ stream->walmethod->close(f, CLOSE_UNLINK);
- fprintf(stderr, _("%s: could not write timeline history file \"%s\": %s\n"),
- progname, tmppath, strerror(errno));
return false;
}
- if (close(fd) != 0)
+ if (stream->walmethod->close(f, CLOSE_NORMAL) != 0)
{
fprintf(stderr, _("%s: could not close file \"%s\": %s\n"),
- progname, tmppath, strerror(errno));
- return false;
- }
-
- /*
- * Now move the completed history file into place with its final name.
- */
- if (durable_rename(tmppath, path, progname) < 0)
- {
- /* durable_rename produced a log entry */
+ progname, histfname, stream->walmethod->getlasterror());
return false;
}
@@ -407,8 +308,7 @@ writeTimeLineHistoryFile(StreamCtl *stream, char *filename, char *content)
if (stream->mark_done)
{
/* writes error message if failed */
- if (!mark_file_as_archived(stream->basedir, histfname,
- stream->do_sync))
+ if (!mark_file_as_archived(stream, histfname))
return false;
}
@@ -618,7 +518,9 @@ ReceiveXlogStream(PGconn *conn, StreamCtl *stream)
{
/*
* Fetch the timeline history file for this timeline, if we don't have
- * it already.
+ * it already. When streaming log to tar, this will always return
+ * false, as we are never streaming into an existing file and
+ * therefore there can be no pre-existing timeline history file.
*/
if (!existsTimeLineHistoryFile(stream))
{
@@ -777,10 +679,10 @@ ReceiveXlogStream(PGconn *conn, StreamCtl *stream)
}
error:
- if (walfile != -1 && close(walfile) != 0)
+ if (walfile != NULL && stream->walmethod->close(walfile, CLOSE_NORMAL) != 0)
fprintf(stderr, _("%s: could not close file \"%s\": %s\n"),
- progname, current_walfile_name, strerror(errno));
- walfile = -1;
+ progname, current_walfile_name, stream->walmethod->getlasterror());
+ walfile = NULL;
return false;
}
@@ -864,12 +766,12 @@ HandleCopyStream(PGconn *conn, StreamCtl *stream,
* If synchronous option is true, issue sync command as soon as there
* are WAL data which has not been flushed yet.
*/
- if (stream->synchronous && lastFlushPosition < blockpos && walfile != -1)
+ if (stream->synchronous && lastFlushPosition < blockpos && walfile != NULL)
{
- if (stream->do_sync && fsync(walfile) != 0)
+ if (stream->walmethod->fsync(walfile) != 0)
{
fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"),
- progname, current_walfile_name, strerror(errno));
+ progname, current_walfile_name, stream->walmethod->getlasterror());
goto error;
}
lastFlushPosition = blockpos;
@@ -1100,7 +1002,7 @@ ProcessKeepaliveMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len,
if (replyRequested && still_sending)
{
if (reportFlushPosition && lastFlushPosition < blockpos &&
- walfile != -1)
+ walfile != NULL)
{
/*
* If a valid flush location needs to be reported, flush the
@@ -1109,10 +1011,10 @@ ProcessKeepaliveMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len,
* data has been successfully replicated or not, at the normal
* shutdown of the server.
*/
- if (stream->do_sync && fsync(walfile) != 0)
+ if (stream->walmethod->fsync(walfile) != 0)
{
fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"),
- progname, current_walfile_name, strerror(errno));
+ progname, current_walfile_name, stream->walmethod->getlasterror());
return false;
}
lastFlushPosition = blockpos;
@@ -1170,7 +1072,7 @@ ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len,
* Verify that the initial location in the stream matches where we think
* we are.
*/
- if (walfile == -1)
+ if (walfile == NULL)
{
/* No file open yet */
if (xlogoff != 0)
@@ -1184,12 +1086,11 @@ ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len,
else
{
/* More data in existing segment */
- /* XXX: store seek value don't reseek all the time */
- if (lseek(walfile, 0, SEEK_CUR) != xlogoff)
+ if (stream->walmethod->get_current_pos(walfile) != xlogoff)
{
fprintf(stderr,
_("%s: got WAL data offset %08x, expected %08x\n"),
- progname, xlogoff, (int) lseek(walfile, 0, SEEK_CUR));
+ progname, xlogoff, (int) stream->walmethod->get_current_pos(walfile));
return false;
}
}
@@ -1210,7 +1111,7 @@ ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len,
else
bytes_to_write = bytes_left;
- if (walfile == -1)
+ if (walfile == NULL)
{
if (!open_walfile(stream, *blockpos))
{
@@ -1219,14 +1120,13 @@ ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len,
}
}
- if (write(walfile,
- copybuf + hdr_len + bytes_written,
- bytes_to_write) != bytes_to_write)
+ if (stream->walmethod->write(walfile, copybuf + hdr_len + bytes_written,
+ bytes_to_write) != bytes_to_write)
{
fprintf(stderr,
_("%s: could not write %u bytes to WAL file \"%s\": %s\n"),
progname, bytes_to_write, current_walfile_name,
- strerror(errno));
+ stream->walmethod->getlasterror());
return false;
}
diff --git a/src/bin/pg_basebackup/receivelog.h b/src/bin/pg_basebackup/receivelog.h
index 7a3bbc5080..b5913ea995 100644
--- a/src/bin/pg_basebackup/receivelog.h
+++ b/src/bin/pg_basebackup/receivelog.h
@@ -13,6 +13,7 @@
#define RECEIVELOG_H
#include "libpq-fe.h"
+#include "walmethods.h"
#include "access/xlogdefs.h"
@@ -41,7 +42,7 @@ typedef struct StreamCtl
stream_stop_callback stream_stop; /* Stop streaming when returns true */
- char *basedir; /* Received segments written to this dir */
+ WalWriteMethod *walmethod; /* How to write the WAL */
char *partial_suffix; /* Suffix appended to partially received files */
} StreamCtl;
diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
index 579d7a15fb..91eb84e238 100644
--- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl
+++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
@@ -4,7 +4,7 @@ use Cwd;
use Config;
use PostgresNode;
use TestLib;
-use Test::More tests => 67;
+use Test::More tests => 69;
program_help_ok('pg_basebackup');
program_version_ok('pg_basebackup');
@@ -237,6 +237,10 @@ $node->command_ok(
'pg_basebackup -X stream runs');
ok(grep(/^[0-9A-F]{24}$/, slurp_dir("$tempdir/backupxf/pg_wal")),
'WAL files copied');
+$node->command_ok(
+ [ 'pg_basebackup', '-D', "$tempdir/backupxst", '-X', 'stream', '-Ft' ],
+ 'pg_basebackup -X stream runs in tar mode');
+ok(-f "$tempdir/backupxst/pg_wal.tar", "tar file was created");
$node->command_fails(
[ 'pg_basebackup', '-D', "$tempdir/fail", '-S', 'slot1' ],
diff --git a/src/bin/pg_basebackup/walmethods.c b/src/bin/pg_basebackup/walmethods.c
new file mode 100644
index 0000000000..e0ec752bbd
--- /dev/null
+++ b/src/bin/pg_basebackup/walmethods.c
@@ -0,0 +1,886 @@
+/*-------------------------------------------------------------------------
+ *
+ * walmethods.c - implementations of different ways to write received wal
+ *
+ * NOTE! The caller must ensure that only one method is instantiated in
+ * any given program, and that it's only instantiated once!
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/bin/pg_basebackup/walmethods.c
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres_fe.h"
+
+#include
+#include
+#include
+#ifdef HAVE_LIBZ
+#include
+#endif
+
+#include "pgtar.h"
+#include "common/file_utils.h"
+
+#include "receivelog.h"
+#include "streamutil.h"
+
+/* Size of zlib buffer for .tar.gz */
+#define ZLIB_OUT_SIZE 4096
+
+/*-------------------------------------------------------------------------
+ * WalDirectoryMethod - write wal to a directory looking like pg_xlog
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * Global static data for this method
+ */
+typedef struct DirectoryMethodData
+{
+ char *basedir;
+ bool sync;
+} DirectoryMethodData;
+static DirectoryMethodData *dir_data = NULL;
+
+/*
+ * Local file handle
+ */
+typedef struct DirectoryMethodFile
+{
+ int fd;
+ off_t currpos;
+ char *pathname;
+ char *fullpath;
+ char *temp_suffix;
+} DirectoryMethodFile;
+
+static char *
+dir_getlasterror(void)
+{
+ /* Directory method always sets errno, so just use strerror */
+ return strerror(errno);
+}
+
+static Walfile
+dir_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_size)
+{
+ static char tmppath[MAXPGPATH];
+ int fd;
+ DirectoryMethodFile *f;
+
+ snprintf(tmppath, sizeof(tmppath), "%s/%s%s",
+ dir_data->basedir, pathname, temp_suffix ? temp_suffix : "");
+
+ fd = open(tmppath, O_WRONLY | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ return NULL;
+
+ if (pad_to_size)
+ {
+ /* Always pre-pad on regular files */
+ char *zerobuf;
+ int bytes;
+
+ zerobuf = pg_malloc0(XLOG_BLCKSZ);
+ for (bytes = 0; bytes < pad_to_size; bytes += XLOG_BLCKSZ)
+ {
+ if (write(fd, zerobuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+ {
+ int save_errno = errno;
+
+ pg_free(zerobuf);
+ close(fd);
+ errno = save_errno;
+ return NULL;
+ }
+ }
+ pg_free(zerobuf);
+
+ if (lseek(fd, 0, SEEK_SET) != 0)
+ {
+ int save_errno = errno;
+
+ close(fd);
+ errno = save_errno;
+ return NULL;
+ }
+ }
+
+ /*
+ * fsync WAL file and containing directory, to ensure the file is
+ * persistently created and zeroed (if padded). That's particularly
+ * important when using synchronous mode, where the file is modified and
+ * fsynced in-place, without a directory fsync.
+ */
+ if (dir_data->sync)
+ {
+ if (fsync_fname(tmppath, false, progname) != 0 ||
+ fsync_parent_path(tmppath, progname) != 0)
+ {
+ close(fd);
+ return NULL;
+ }
+ }
+
+ f = pg_malloc0(sizeof(DirectoryMethodFile));
+ f->fd = fd;
+ f->currpos = 0;
+ f->pathname = pg_strdup(pathname);
+ f->fullpath = pg_strdup(tmppath);
+ if (temp_suffix)
+ f->temp_suffix = pg_strdup(temp_suffix);
+
+ return f;
+}
+
+static ssize_t
+dir_write(Walfile f, const void *buf, size_t count)
+{
+ ssize_t r;
+ DirectoryMethodFile *df = (DirectoryMethodFile *) f;
+
+ Assert(f != NULL);
+
+ r = write(df->fd, buf, count);
+ if (r > 0)
+ df->currpos += r;
+ return r;
+}
+
+static off_t
+dir_get_current_pos(Walfile f)
+{
+ Assert(f != NULL);
+
+ /* Use a cached value to prevent lots of reseeks */
+ return ((DirectoryMethodFile *) f)->currpos;
+}
+
+static int
+dir_close(Walfile f, WalCloseMethod method)
+{
+ int r;
+ DirectoryMethodFile *df = (DirectoryMethodFile *) f;
+ static char tmppath[MAXPGPATH];
+ static char tmppath2[MAXPGPATH];
+
+ Assert(f != NULL);
+
+ r = close(df->fd);
+
+ if (r == 0)
+ {
+ /* Build path to the current version of the file */
+ if (method == CLOSE_NORMAL && df->temp_suffix)
+ {
+ /*
+ * If we have a temp prefix, normal operation is to rename the
+ * file.
+ */
+ snprintf(tmppath, sizeof(tmppath), "%s/%s%s",
+ dir_data->basedir, df->pathname, df->temp_suffix);
+ snprintf(tmppath2, sizeof(tmppath2), "%s/%s",
+ dir_data->basedir, df->pathname);
+ r = durable_rename(tmppath, tmppath2, progname);
+ }
+ else if (method == CLOSE_UNLINK)
+ {
+ /* Unlink the file once it's closed */
+ snprintf(tmppath, sizeof(tmppath), "%s/%s%s",
+ dir_data->basedir, df->pathname, df->temp_suffix ? df->temp_suffix : "");
+ r = unlink(tmppath);
+ }
+ else
+ {
+ /*
+ * Else either CLOSE_NORMAL and no temp suffix, or
+ * CLOSE_NO_RENAME. In this case, fsync the file and containing
+ * directory if sync mode is requested.
+ */
+ if (dir_data->sync)
+ {
+ r = fsync_fname(df->fullpath, false, progname);
+ if (r == 0)
+ r = fsync_parent_path(df->fullpath, progname);
+ }
+ }
+ }
+
+ pg_free(df->pathname);
+ pg_free(df->fullpath);
+ if (df->temp_suffix)
+ pg_free(df->temp_suffix);
+ pg_free(df);
+
+ return r;
+}
+
+static int
+dir_fsync(Walfile f)
+{
+ Assert(f != NULL);
+
+ if (!dir_data->sync)
+ return 0;
+
+ return fsync(((DirectoryMethodFile *) f)->fd);
+}
+
+static ssize_t
+dir_get_file_size(const char *pathname)
+{
+ struct stat statbuf;
+ static char tmppath[MAXPGPATH];
+
+ snprintf(tmppath, sizeof(tmppath), "%s/%s",
+ dir_data->basedir, pathname);
+
+ if (stat(tmppath, &statbuf) != 0)
+ return -1;
+
+ return statbuf.st_size;
+}
+
+static bool
+dir_existsfile(const char *pathname)
+{
+ static char tmppath[MAXPGPATH];
+ int fd;
+
+ snprintf(tmppath, sizeof(tmppath), "%s/%s",
+ dir_data->basedir, pathname);
+
+ fd = open(tmppath, O_RDONLY | PG_BINARY, 0);
+ if (fd < 0)
+ return false;
+ close(fd);
+ return true;
+}
+
+static bool
+dir_finish(void)
+{
+ if (dir_data->sync)
+ {
+ /*
+ * Files are fsynced when they are closed, but we need to fsync the
+ * directory entry here as well.
+ */
+ if (fsync_fname(dir_data->basedir, true, progname) != 0)
+ return false;
+ }
+ return true;
+}
+
+
+WalWriteMethod *
+CreateWalDirectoryMethod(const char *basedir, bool sync)
+{
+ WalWriteMethod *method;
+
+ method = pg_malloc0(sizeof(WalWriteMethod));
+ method->open_for_write = dir_open_for_write;
+ method->write = dir_write;
+ method->get_current_pos = dir_get_current_pos;
+ method->get_file_size = dir_get_file_size;
+ method->close = dir_close;
+ method->fsync = dir_fsync;
+ method->existsfile = dir_existsfile;
+ method->finish = dir_finish;
+ method->getlasterror = dir_getlasterror;
+
+ dir_data = pg_malloc0(sizeof(DirectoryMethodData));
+ dir_data->basedir = pg_strdup(basedir);
+ dir_data->sync = sync;
+
+ return method;
+}
+
+
+/*-------------------------------------------------------------------------
+ * WalTarMethod - write wal to a tar file containing pg_xlog contents
+ *-------------------------------------------------------------------------
+ */
+
+typedef struct TarMethodFile
+{
+ off_t ofs_start; /* Where does the *header* for this file start */
+ off_t currpos;
+ char header[512];
+ char *pathname;
+ size_t pad_to_size;
+} TarMethodFile;
+
+typedef struct TarMethodData
+{
+ char *tarfilename;
+ int fd;
+ int compression;
+ bool sync;
+ TarMethodFile *currentfile;
+ char lasterror[1024];
+#ifdef HAVE_LIBZ
+ z_streamp zp;
+ void *zlibOut;
+#endif
+} TarMethodData;
+static TarMethodData *tar_data = NULL;
+
+#define tar_clear_error() tar_data->lasterror[0] = '\0'
+#define tar_set_error(msg) strlcpy(tar_data->lasterror, msg, sizeof(tar_data->lasterror))
+
+static char *
+tar_getlasterror(void)
+{
+ /*
+ * If a custom error is set, return that one. Otherwise, assume errno is
+ * set and return that one.
+ */
+ if (tar_data->lasterror[0])
+ return tar_data->lasterror;
+ return strerror(errno);
+}
+
+#ifdef HAVE_LIBZ
+static bool
+tar_write_compressed_data(void *buf, size_t count, bool flush)
+{
+ tar_data->zp->next_in = buf;
+ tar_data->zp->avail_in = count;
+
+ while (tar_data->zp->avail_in || flush)
+ {
+ int r;
+
+ r = deflate(tar_data->zp, flush ? Z_FINISH : Z_NO_FLUSH);
+ if (r == Z_STREAM_ERROR)
+ {
+ tar_set_error("deflate failed");
+ return false;
+ }
+
+ if (tar_data->zp->avail_out < ZLIB_OUT_SIZE)
+ {
+ size_t len = ZLIB_OUT_SIZE - tar_data->zp->avail_out;
+
+ if (write(tar_data->fd, tar_data->zlibOut, len) != len)
+ return false;
+
+ tar_data->zp->next_out = tar_data->zlibOut;
+ tar_data->zp->avail_out = ZLIB_OUT_SIZE;
+ }
+
+ if (r == Z_STREAM_END)
+ break;
+ }
+
+ if (flush)
+ {
+ /* Reset the stream for writing */
+ if (deflateReset(tar_data->zp) != Z_OK)
+ {
+ tar_set_error("deflateReset failed");
+ return false;
+ }
+ }
+
+ return true;
+}
+#endif
+
+static ssize_t
+tar_write(Walfile f, const void *buf, size_t count)
+{
+ ssize_t r;
+
+ Assert(f != NULL);
+ tar_clear_error();
+
+ /* Tarfile will always be positioned at the end */
+ if (!tar_data->compression)
+ {
+ r = write(tar_data->fd, buf, count);
+ if (r > 0)
+ ((TarMethodFile *) f)->currpos += r;
+ return r;
+ }
+#ifdef HAVE_LIBZ
+ else
+ {
+ if (!tar_write_compressed_data((void *) buf, count, false))
+ return -1;
+ ((TarMethodFile *) f)->currpos += count;
+ return count;
+ }
+#endif
+}
+
+static bool
+tar_write_padding_data(TarMethodFile * f, size_t bytes)
+{
+ char *zerobuf = pg_malloc0(XLOG_BLCKSZ);
+ size_t bytesleft = bytes;
+
+ while (bytesleft)
+ {
+ size_t bytestowrite = bytesleft > XLOG_BLCKSZ ? XLOG_BLCKSZ : bytesleft;
+
+ size_t r = tar_write(f, zerobuf, bytestowrite);
+
+ if (r < 0)
+ return false;
+ bytesleft -= r;
+ }
+ return true;
+}
+
+static Walfile
+tar_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_size)
+{
+ int save_errno;
+ static char tmppath[MAXPGPATH];
+
+ tar_clear_error();
+
+ if (tar_data->fd < 0)
+ {
+ /*
+ * We open the tar file only when we first try to write to it.
+ */
+ tar_data->fd = open(tar_data->tarfilename,
+ O_WRONLY | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR);
+ if (tar_data->fd < 0)
+ return NULL;
+
+#ifdef HAVE_LIBZ
+ if (tar_data->compression)
+ {
+ tar_data->zp = (z_streamp) pg_malloc(sizeof(z_stream));
+ tar_data->zp->zalloc = Z_NULL;
+ tar_data->zp->zfree = Z_NULL;
+ tar_data->zp->opaque = Z_NULL;
+ tar_data->zp->next_out = tar_data->zlibOut;
+ tar_data->zp->avail_out = ZLIB_OUT_SIZE;
+
+ /*
+ * Initialize deflation library. Adding the magic value 16 to the
+ * default 15 for the windowBits parameter makes the output be
+ * gzip instead of zlib.
+ */
+ if (deflateInit2(tar_data->zp, tar_data->compression, Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY) != Z_OK)
+ {
+ pg_free(tar_data->zp);
+ tar_data->zp = NULL;
+ tar_set_error("deflateInit2 failed");
+ return NULL;
+ }
+ }
+#endif
+
+ /* There's no tar header itself, the file starts with regular files */
+ }
+
+ Assert(tar_data->currentfile == NULL);
+ if (tar_data->currentfile != NULL)
+ {
+ tar_set_error("implementation error: tar files can't have more than one open file\n");
+ return NULL;
+ }
+
+ tar_data->currentfile = pg_malloc0(sizeof(TarMethodFile));
+
+ snprintf(tmppath, sizeof(tmppath), "%s%s",
+ pathname, temp_suffix ? temp_suffix : "");
+
+ /* Create a header with size set to 0 - we will fill out the size on close */
+ if (tarCreateHeader(tar_data->currentfile->header, tmppath, NULL, 0, S_IRUSR | S_IWUSR, 0, 0, time(NULL)) != TAR_OK)
+ {
+ pg_free(tar_data->currentfile);
+ tar_data->currentfile = NULL;
+ tar_set_error("could not create tar header");
+ return NULL;
+ }
+
+#ifdef HAVE_LIBZ
+ if (tar_data->compression)
+ {
+ /* Flush existing data */
+ if (!tar_write_compressed_data(NULL, 0, true))
+ return NULL;
+
+ /* Turn off compression for header */
+ if (deflateParams(tar_data->zp, 0, 0) != Z_OK)
+ {
+ tar_set_error("deflateParams failed");
+ return NULL;
+ }
+ }
+#endif
+
+ tar_data->currentfile->ofs_start = lseek(tar_data->fd, 0, SEEK_CUR);
+ if (tar_data->currentfile->ofs_start == -1)
+ {
+ save_errno = errno;
+ pg_free(tar_data->currentfile);
+ tar_data->currentfile = NULL;
+ errno = save_errno;
+ return NULL;
+ }
+ tar_data->currentfile->currpos = 0;
+
+ if (!tar_data->compression)
+ {
+ if (write(tar_data->fd, tar_data->currentfile->header, 512) != 512)
+ {
+ save_errno = errno;
+ pg_free(tar_data->currentfile);
+ tar_data->currentfile = NULL;
+ errno = save_errno;
+ return NULL;
+ }
+ }
+#ifdef HAVE_LIBZ
+ else
+ {
+ /* Write header through the zlib APIs but with no compression */
+ if (!tar_write_compressed_data(tar_data->currentfile->header, 512, true))
+ return NULL;
+
+ /* Re-enable compression for the rest of the file */
+ if (deflateParams(tar_data->zp, tar_data->compression, 0) != Z_OK)
+ {
+ tar_set_error("deflateParams failed");
+ return NULL;
+ }
+ }
+#endif
+
+ tar_data->currentfile->pathname = pg_strdup(pathname);
+
+ /*
+ * Uncompressed files are padded on creation, but for compression we can't
+ * do that
+ */
+ if (pad_to_size)
+ {
+ tar_data->currentfile->pad_to_size = pad_to_size;
+ if (!tar_data->compression)
+ {
+ /* Uncompressed, so pad now */
+ tar_write_padding_data(tar_data->currentfile, pad_to_size);
+ /* Seek back to start */
+ if (lseek(tar_data->fd, tar_data->currentfile->ofs_start + 512, SEEK_SET) != tar_data->currentfile->ofs_start + 512)
+ return NULL;
+
+ tar_data->currentfile->currpos = 0;
+ }
+ }
+
+ return tar_data->currentfile;
+}
+
+static ssize_t
+tar_get_file_size(const char *pathname)
+{
+ tar_clear_error();
+
+ /* Currently not used, so not supported */
+ errno = ENOSYS;
+ return -1;
+}
+
+static off_t
+tar_get_current_pos(Walfile f)
+{
+ Assert(f != NULL);
+ tar_clear_error();
+
+ return ((TarMethodFile *) f)->currpos;
+}
+
+static int
+tar_fsync(Walfile f)
+{
+ Assert(f != NULL);
+ tar_clear_error();
+
+ /*
+ * Always sync the whole tarfile, because that's all we can do. This makes
+ * no sense on compressed files, so just ignore those.
+ */
+ if (tar_data->compression)
+ return 0;
+
+ return fsync(tar_data->fd);
+}
+
+static int
+tar_close(Walfile f, WalCloseMethod method)
+{
+ ssize_t filesize;
+ int padding;
+ TarMethodFile *tf = (TarMethodFile *) f;
+
+ Assert(f != NULL);
+ tar_clear_error();
+
+ if (method == CLOSE_UNLINK)
+ {
+ if (tar_data->compression)
+ {
+ tar_set_error("unlink not supported with compression");
+ return -1;
+ }
+
+ /*
+ * Unlink the file that we just wrote to the tar. We do this by
+ * truncating it to the start of the header. This is safe as we only
+ * allow writing of the very last file.
+ */
+ if (ftruncate(tar_data->fd, tf->ofs_start) != 0)
+ return -1;
+
+ pg_free(tf->pathname);
+ pg_free(tf);
+ tar_data->currentfile = NULL;
+
+ return 0;
+ }
+
+ /*
+ * Pad the file itself with zeroes if necessary. Note that this is
+ * different from the tar format padding -- this is the padding we asked
+ * for when the file was opened.
+ */
+ if (tf->pad_to_size)
+ {
+ if (tar_data->compression)
+ {
+ /*
+ * A compressed tarfile is padded on close since we cannot know
+ * the size of the compressed output until the end.
+ */
+ size_t sizeleft = tf->pad_to_size - tf->currpos;
+
+ if (sizeleft)
+ {
+ if (!tar_write_padding_data(tf, sizeleft))
+ return -1;
+ }
+ }
+ else
+ {
+ /*
+ * An uncompressed tarfile was padded on creation, so just adjust
+ * the current position as if we seeked to the end.
+ */
+ tf->currpos = tf->pad_to_size;
+ }
+ }
+
+ /*
+ * Get the size of the file, and pad the current data up to the nearest
+ * 512 byte boundary.
+ */
+ filesize = tar_get_current_pos(f);
+ padding = ((filesize + 511) & ~511) - filesize;
+ if (padding)
+ {
+ char zerobuf[512];
+
+ MemSet(zerobuf, 0, padding);
+ if (tar_write(f, zerobuf, padding) != padding)
+ return -1;
+ }
+
+
+#ifdef HAVE_LIBZ
+ if (tar_data->compression)
+ {
+ /* Flush the current buffer */
+ if (!tar_write_compressed_data(NULL, 0, true))
+ {
+ errno = EINVAL;
+ return -1;
+ }
+ }
+#endif
+
+ /*
+ * Now go back and update the header with the correct filesize and
+ * possibly also renaming the file. We overwrite the entire current header
+ * when done, including the checksum.
+ */
+ print_tar_number(&(tf->header[124]), 12, filesize);
+
+ if (method == CLOSE_NORMAL)
+
+ /*
+ * We overwrite it with what it was before if we have no tempname,
+ * since we're going to write the buffer anyway.
+ */
+ strlcpy(&(tf->header[0]), tf->pathname, 100);
+
+ print_tar_number(&(tf->header[148]), 8, tarChecksum(((TarMethodFile *) f)->header));
+ if (lseek(tar_data->fd, tf->ofs_start, SEEK_SET) != ((TarMethodFile *) f)->ofs_start)
+ return -1;
+ if (!tar_data->compression)
+ {
+ if (write(tar_data->fd, tf->header, 512) != 512)
+ return -1;
+ }
+#ifdef HAVE_LIBZ
+ else
+ {
+ /* Turn off compression */
+ if (deflateParams(tar_data->zp, 0, 0) != Z_OK)
+ {
+ tar_set_error("deflateParams failed");
+ return -1;
+ }
+
+ /* Overwrite the header, assuming the size will be the same */
+ if (!tar_write_compressed_data(tar_data->currentfile->header, 512, true))
+ return -1;
+
+ /* Turn compression back on */
+ if (deflateParams(tar_data->zp, tar_data->compression, 0) != Z_OK)
+ {
+ tar_set_error("deflateParams failed");
+ return -1;
+ }
+ }
+#endif
+
+ /* Move file pointer back down to end, so we can write the next file */
+ if (lseek(tar_data->fd, 0, SEEK_END) < 0)
+ return -1;
+
+ /* Always fsync on close, so the padding gets fsynced */
+ tar_fsync(f);
+
+ /* Clean up and done */
+ pg_free(tf->pathname);
+ pg_free(tf);
+ tar_data->currentfile = NULL;
+
+ return 0;
+}
+
+static bool
+tar_existsfile(const char *pathname)
+{
+ tar_clear_error();
+ /* We only deal with new tarfiles, so nothing externally created exists */
+ return false;
+}
+
+static bool
+tar_finish(void)
+{
+ char zerobuf[1024];
+
+ tar_clear_error();
+
+ if (tar_data->currentfile)
+ {
+ if (tar_close(tar_data->currentfile, CLOSE_NORMAL) != 0)
+ return false;
+ }
+
+ /* A tarfile always ends with two empty blocks */
+ MemSet(zerobuf, 0, sizeof(zerobuf));
+ if (!tar_data->compression)
+ {
+ if (write(tar_data->fd, zerobuf, sizeof(zerobuf)) != sizeof(zerobuf))
+ return false;
+ }
+#ifdef HAVE_LIBZ
+ else
+ {
+ if (!tar_write_compressed_data(zerobuf, sizeof(zerobuf), false))
+ return false;
+
+ /* Also flush all data to make sure the gzip stream is finished */
+ tar_data->zp->next_in = NULL;
+ tar_data->zp->avail_in = 0;
+ while (true)
+ {
+ int r;
+
+ r = deflate(tar_data->zp, Z_FINISH);
+
+ if (r == Z_STREAM_ERROR)
+ {
+ tar_set_error("deflate failed");
+ return false;
+ }
+ if (tar_data->zp->avail_out < ZLIB_OUT_SIZE)
+ {
+ size_t len = ZLIB_OUT_SIZE - tar_data->zp->avail_out;
+
+ if (write(tar_data->fd, tar_data->zlibOut, len) != len)
+ return false;
+ }
+ if (r == Z_STREAM_END)
+ break;
+ }
+
+ if (deflateEnd(tar_data->zp) != Z_OK)
+ {
+ tar_set_error("deflateEnd failed");
+ return false;
+ }
+ }
+#endif
+
+ /* sync the empty blocks as well, since they're after the last file */
+ fsync(tar_data->fd);
+
+ if (close(tar_data->fd) != 0)
+ return false;
+
+ tar_data->fd = -1;
+
+ if (tar_data->sync)
+ {
+ if (fsync_fname(tar_data->tarfilename, false, progname) != 0)
+ return false;
+ if (fsync_parent_path(tar_data->tarfilename, progname) != 0)
+ return false;
+ }
+
+ return true;
+}
+
+WalWriteMethod *
+CreateWalTarMethod(const char *tarbase, int compression, bool sync)
+{
+ WalWriteMethod *method;
+ const char *suffix = (compression != 0) ? ".tar.gz" : ".tar";
+
+ method = pg_malloc0(sizeof(WalWriteMethod));
+ method->open_for_write = tar_open_for_write;
+ method->write = tar_write;
+ method->get_current_pos = tar_get_current_pos;
+ method->get_file_size = tar_get_file_size;
+ method->close = tar_close;
+ method->fsync = tar_fsync;
+ method->existsfile = tar_existsfile;
+ method->finish = tar_finish;
+ method->getlasterror = tar_getlasterror;
+
+ tar_data = pg_malloc0(sizeof(TarMethodData));
+ tar_data->tarfilename = pg_malloc0(strlen(tarbase) + strlen(suffix) + 1);
+ sprintf(tar_data->tarfilename, "%s%s", tarbase, suffix);
+ tar_data->fd = -1;
+ tar_data->compression = compression;
+ tar_data->sync = sync;
+ if (compression)
+ tar_data->zlibOut = (char *) pg_malloc(ZLIB_OUT_SIZE + 1);
+
+ return method;
+}
diff --git a/src/bin/pg_basebackup/walmethods.h b/src/bin/pg_basebackup/walmethods.h
new file mode 100644
index 0000000000..fa58f812f6
--- /dev/null
+++ b/src/bin/pg_basebackup/walmethods.h
@@ -0,0 +1,45 @@
+/*-------------------------------------------------------------------------
+ *
+ * walmethods.h
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/bin/pg_basebackup/walmethods.h
+ *-------------------------------------------------------------------------
+ */
+
+
+typedef void *Walfile;
+
+typedef enum
+{
+ CLOSE_NORMAL,
+ CLOSE_UNLINK,
+ CLOSE_NO_RENAME,
+} WalCloseMethod;
+
+typedef struct WalWriteMethod WalWriteMethod;
+struct WalWriteMethod
+{
+ Walfile(*open_for_write) (const char *pathname, const char *temp_suffix, size_t pad_to_size);
+ int (*close) (Walfile f, WalCloseMethod method);
+ bool (*existsfile) (const char *pathname);
+ ssize_t (*get_file_size) (const char *pathname);
+
+ ssize_t (*write) (Walfile f, const void *buf, size_t count);
+ off_t (*get_current_pos) (Walfile f);
+ int (*fsync) (Walfile f);
+ bool (*finish) (void);
+ char *(*getlasterror) (void);
+};
+
+/*
+ * Available WAL methods:
+ * - WalDirectoryMethod - write WAL to regular files in a standard pg_xlog
+ * - TarDirectoryMethod - write WAL to a tarfile corresponding to pg_xlog
+ * (only implements the methods required for pg_basebackup,
+ * not all those required for pg_receivexlog)
+ */
+WalWriteMethod *CreateWalDirectoryMethod(const char *basedir, bool sync);
+WalWriteMethod *CreateWalTarMethod(const char *tarbase, int compression, bool sync);
diff --git a/src/include/pgtar.h b/src/include/pgtar.h
index 45ca400f98..1d179f0df1 100644
--- a/src/include/pgtar.h
+++ b/src/include/pgtar.h
@@ -22,4 +22,5 @@ enum tarError
extern enum tarError tarCreateHeader(char *h, const char *filename, const char *linktarget,
pgoff_t size, mode_t mode, uid_t uid, gid_t gid, time_t mtime);
extern uint64 read_tar_number(const char *s, int len);
+extern void print_tar_number(char *s, int len, uint64 val);
extern int tarChecksum(char *header);
diff --git a/src/port/tar.c b/src/port/tar.c
index 52a2113a47..f1da959dac 100644
--- a/src/port/tar.c
+++ b/src/port/tar.c
@@ -16,7 +16,7 @@
* support only non-negative numbers, so we don't worry about the GNU rules
* for handling negative numbers.)
*/
-static void
+void
print_tar_number(char *s, int len, uint64 val)
{
if (val < (((uint64) 1) << ((len - 1) * 3)))
--
cgit v1.2.3
From 7d80417d3dfc88b0c03b5c08a18b29f9d430e217 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Sun, 23 Oct 2016 22:13:28 -0400
Subject: Release notes for 9.6.1, 9.5.5, 9.4.10, 9.3.15, 9.2.19, 9.1.24.
---
doc/src/sgml/release-9.1.sgml | 210 +++++++++++
doc/src/sgml/release-9.2.sgml | 266 ++++++++++++++
doc/src/sgml/release-9.3.sgml | 329 +++++++++++++++++
doc/src/sgml/release-9.4.sgml | 466 ++++++++++++++++++++++++
doc/src/sgml/release-9.5.sgml | 812 ++++++++++++++++++++++++++++++++++++++++++
doc/src/sgml/release-9.6.sgml | 586 ++----------------------------
6 files changed, 2103 insertions(+), 566 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/release-9.1.sgml b/doc/src/sgml/release-9.1.sgml
index 26b709056f..edacfbf355 100644
--- a/doc/src/sgml/release-9.1.sgml
+++ b/doc/src/sgml/release-9.1.sgml
@@ -1,6 +1,216 @@
+
+ Release 9.1.24
+
+
+ Release Date
+ 2016-10-27
+
+
+
+ This release contains a variety of fixes from 9.1.23.
+ For information about new features in the 9.1 major release, see
+ .
+
+
+
+ This is expected to be the last PostgreSQL> release
+ in the 9.1.X series. Users are encouraged to update to a newer
+ release branch soon.
+
+
+
+ Migration to Version 9.1.24
+
+
+ A dump/restore is not required for those running 9.1.X.
+
+
+
+ However, if you are upgrading from a version earlier than 9.1.16,
+ see .
+
+
+
+
+
+ Changes
+
+
+
+
+
+ Fix EvalPlanQual rechecks involving CTE scans (Tom Lane)
+
+
+
+ The recheck would always see the CTE as returning no rows, typically
+ leading to failure to update rows that were recently updated.
+
+
+
+
+
+ Fix improper repetition of previous results from hashed aggregation in
+ a subquery (Andrew Gierth)
+
+
+
+ The test to see if we can reuse a previously-computed hash table of
+ the aggregate state values neglected the possibility of an outer query
+ reference appearing in an aggregate argument expression. A change in
+ the value of such a reference should lead to recalculating the hash
+ table, but did not.
+
+
+
+
+
+ Fix timeout length when VACUUM> is waiting for exclusive
+ table lock so that it can truncate the table (Simon Riggs)
+
+
+
+ The timeout was meant to be 50 milliseconds, but it was actually only
+ 50 microseconds, causing VACUUM> to give up on truncation
+ much more easily than intended. Set it to the intended value.
+
+
+
+
+
+ Remove artificial restrictions on the values accepted
+ by numeric_in()> and numeric_recv()>
+ (Tom Lane)
+
+
+
+ We allow numeric values up to the limit of the storage format (more
+ than 1e100000>), so it seems fairly pointless
+ that numeric_in()> rejected scientific-notation exponents
+ above 1000. Likewise, it was silly for numeric_recv()> to
+ reject more than 1000 digits in an input value.
+
+
+
+
+
+ Avoid very-low-probability data corruption due to testing tuple
+ visibility without holding buffer lock (Thomas Munro, Peter Geoghegan,
+ Tom Lane)
+
+
+
+
+
+ Fix file descriptor leakage when truncating a temporary relation of
+ more than 1GB (Andres Freund)
+
+
+
+
+
+ Disallow starting a standalone backend with standby_mode>
+ turned on (Michael Paquier)
+
+
+
+ This can't do anything useful, since there will be no WAL receiver
+ process to fetch more WAL data; and it could result in misbehavior
+ in code that wasn't designed with this situation in mind.
+
+
+
+
+
+ Don't try to share SSL contexts across multiple connections
+ in libpq> (Heikki Linnakangas)
+
+
+
+ This led to assorted corner-case bugs, particularly when trying to use
+ different SSL parameters for different connections.
+
+
+
+
+
+ Avoid corner-case memory leak in libpq> (Tom Lane)
+
+
+
+ The reported problem involved leaking an error report
+ during PQreset()>, but there might be related cases.
+
+
+
+
+
+ Make ecpg>'s
--help> and
--version>
+ options work consistently with our other executables (Haribabu Kommi)
+
+
+
+
+
+ Fix contrib/intarray/bench/bench.pl> to print the results
+ of the EXPLAIN> it does when given the
-e> option
+ (Daniel Gustafsson)
+
+
+
+
+
+ Prevent failure of obsolete dynamic time zone abbreviations (Tom Lane)
+
+
+
+ If a dynamic time zone abbreviation does not match any entry in the
+ referenced time zone, treat it as equivalent to the time zone name.
+ This avoids unexpected failures when IANA removes abbreviations from
+ their time zone database, as they did in tzdata>
+ release 2016f and seem likely to do again in the future. The
+ consequences were not limited to not recognizing the individual
+ abbreviation; any mismatch caused
+ the pg_timezone_abbrevs> view to fail altogether.
+
+
+
+
+
+ Update time zone data files to tzdata> release 2016h
+ for DST law changes in Palestine and Turkey, plus historical
+ corrections for Turkey and some regions of Russia.
+ Switch to numeric abbreviations for some time zones in Antarctica,
+ the former Soviet Union, and Sri Lanka.
+
+
+
+ The IANA time zone database previously provided textual abbreviations
+ for all time zones, sometimes making up abbreviations that have little
+ or no currency among the local population. They are in process of
+ reversing that policy in favor of using numeric UTC offsets in zones
+ where there is no evidence of real-world use of an English
+ abbreviation. At least for the time being, PostgreSQL>
+ will continue to accept such removed abbreviations for timestamp input.
+ But they will not be shown in the pg_timezone_names>
+ view nor used for output.
+
+
+
+ In this update, AMT> is no longer shown as being in use to
+ mean Armenia Time. Therefore, we have changed the Default>
+ abbreviation set to interpret it as Amazon Time, thus UTC-4 not UTC+4.
+
+
+
+
+
+
+
+
Release 9.1.23
diff --git a/doc/src/sgml/release-9.2.sgml b/doc/src/sgml/release-9.2.sgml
index 0f6e3d127f..49430389d9 100644
--- a/doc/src/sgml/release-9.2.sgml
+++ b/doc/src/sgml/release-9.2.sgml
@@ -1,6 +1,272 @@
+
+ Release 9.2.19
+
+
+ Release Date
+ 2016-10-27
+
+
+
+ This release contains a variety of fixes from 9.2.18.
+ For information about new features in the 9.2 major release, see
+ .
+
+
+
+ Migration to Version 9.2.19
+
+
+ A dump/restore is not required for those running 9.2.X.
+
+
+
+ However, if you are upgrading from a version earlier than 9.2.11,
+ see .
+
+
+
+
+
+ Changes
+
+
+
+
+
+ Fix EvalPlanQual rechecks involving CTE scans (Tom Lane)
+
+
+
+ The recheck would always see the CTE as returning no rows, typically
+ leading to failure to update rows that were recently updated.
+
+
+
+
+
+ Fix improper repetition of previous results from hashed aggregation in
+ a subquery (Andrew Gierth)
+
+
+
+ The test to see if we can reuse a previously-computed hash table of
+ the aggregate state values neglected the possibility of an outer query
+ reference appearing in an aggregate argument expression. A change in
+ the value of such a reference should lead to recalculating the hash
+ table, but did not.
+
+
+
+
+
+ Fix EXPLAIN> to emit valid XML when
+ is on (Markus Winand)
+
+
+
+ Previously the XML output-format option produced syntactically invalid
+ tags such as <I/O-Read-Time>>. That is now
+ rendered as <I-O-Read-Time>>.
+
+
+
+
+
+ Suppress printing of zeroes for unmeasured times
+ in EXPLAIN> (Maksim Milyutin)
+
+
+
+ Certain option combinations resulted in printing zero values for times
+ that actually aren't ever measured in that combination. Our general
+ policy in EXPLAIN> is not to print such fields at all, so
+ do that consistently in all cases.
+
+
+
+
+
+ Fix timeout length when VACUUM> is waiting for exclusive
+ table lock so that it can truncate the table (Simon Riggs)
+
+
+
+ The timeout was meant to be 50 milliseconds, but it was actually only
+ 50 microseconds, causing VACUUM> to give up on truncation
+ much more easily than intended. Set it to the intended value.
+
+
+
+
+
+ Fix bugs in merging inherited CHECK> constraints while
+ creating or altering a table (Tom Lane, Amit Langote)
+
+
+
+ Allow identical CHECK> constraints to be added to a parent
+ and child table in either order. Prevent merging of a valid
+ constraint from the parent table with a NOT VALID>
+ constraint on the child. Likewise, prevent merging of a NO
+ INHERIT> child constraint with an inherited constraint.
+
+
+
+
+
+ Remove artificial restrictions on the values accepted
+ by numeric_in()> and numeric_recv()>
+ (Tom Lane)
+
+
+
+ We allow numeric values up to the limit of the storage format (more
+ than 1e100000>), so it seems fairly pointless
+ that numeric_in()> rejected scientific-notation exponents
+ above 1000. Likewise, it was silly for numeric_recv()> to
+ reject more than 1000 digits in an input value.
+
+
+
+
+
+ Avoid very-low-probability data corruption due to testing tuple
+ visibility without holding buffer lock (Thomas Munro, Peter Geoghegan,
+ Tom Lane)
+
+
+
+
+
+ Fix file descriptor leakage when truncating a temporary relation of
+ more than 1GB (Andres Freund)
+
+
+
+
+
+ Disallow starting a standalone backend with standby_mode>
+ turned on (Michael Paquier)
+
+
+
+ This can't do anything useful, since there will be no WAL receiver
+ process to fetch more WAL data; and it could result in misbehavior
+ in code that wasn't designed with this situation in mind.
+
+
+
+
+
+ Don't try to share SSL contexts across multiple connections
+ in libpq> (Heikki Linnakangas)
+
+
+
+ This led to assorted corner-case bugs, particularly when trying to use
+ different SSL parameters for different connections.
+
+
+
+
+
+ Avoid corner-case memory leak in libpq> (Tom Lane)
+
+
+
+ The reported problem involved leaking an error report
+ during PQreset()>, but there might be related cases.
+
+
+
+
+
+ Make ecpg>'s
--help> and
--version>
+ options work consistently with our other executables (Haribabu Kommi)
+
+
+
+
+
+ In pg_dump>, never dump range constructor functions
+ (Tom Lane)
+
+
+
+ This oversight led to pg_upgrade> failures with
+ extensions containing range types, due to duplicate creation of the
+ constructor functions.
+
+
+
+
+
+ Fix contrib/intarray/bench/bench.pl> to print the results
+ of the EXPLAIN> it does when given the
-e> option
+ (Daniel Gustafsson)
+
+
+
+
+
+ Update Windows time zone mapping to recognize some time zone names
+ added in recent Windows versions (Michael Paquier)
+
+
+
+
+
+ Prevent failure of obsolete dynamic time zone abbreviations (Tom Lane)
+
+
+
+ If a dynamic time zone abbreviation does not match any entry in the
+ referenced time zone, treat it as equivalent to the time zone name.
+ This avoids unexpected failures when IANA removes abbreviations from
+ their time zone database, as they did in tzdata>
+ release 2016f and seem likely to do again in the future. The
+ consequences were not limited to not recognizing the individual
+ abbreviation; any mismatch caused
+ the pg_timezone_abbrevs> view to fail altogether.
+
+
+
+
+
+ Update time zone data files to tzdata> release 2016h
+ for DST law changes in Palestine and Turkey, plus historical
+ corrections for Turkey and some regions of Russia.
+ Switch to numeric abbreviations for some time zones in Antarctica,
+ the former Soviet Union, and Sri Lanka.
+
+
+
+ The IANA time zone database previously provided textual abbreviations
+ for all time zones, sometimes making up abbreviations that have little
+ or no currency among the local population. They are in process of
+ reversing that policy in favor of using numeric UTC offsets in zones
+ where there is no evidence of real-world use of an English
+ abbreviation. At least for the time being, PostgreSQL>
+ will continue to accept such removed abbreviations for timestamp input.
+ But they will not be shown in the pg_timezone_names>
+ view nor used for output.
+
+
+
+ In this update, AMT> is no longer shown as being in use to
+ mean Armenia Time. Therefore, we have changed the Default>
+ abbreviation set to interpret it as Amazon Time, thus UTC-4 not UTC+4.
+
+
+
+
+
+
+
+
Release 9.2.18
diff --git a/doc/src/sgml/release-9.3.sgml b/doc/src/sgml/release-9.3.sgml
index e321f4b31c..81205a40c7 100644
--- a/doc/src/sgml/release-9.3.sgml
+++ b/doc/src/sgml/release-9.3.sgml
@@ -1,6 +1,335 @@
+
+ Release 9.3.15
+
+
+ Release Date
+ 2016-10-27
+
+
+
+ This release contains a variety of fixes from 9.3.14.
+ For information about new features in the 9.3 major release, see
+ .
+
+
+
+ Migration to Version 9.3.15
+
+
+ A dump/restore is not required for those running 9.3.X.
+
+
+
+ However, if your installation has been affected by the bug described in
+ the first changelog entry below, then after updating you may need
+ to take action to repair corrupted free space maps.
+
+
+
+ Also, if you are upgrading from a version earlier than 9.3.9,
+ see .
+
+
+
+
+
+ Changes
+
+
+
+
+
+ Fix WAL-logging of truncation of relation free space maps and
+ visibility maps (Pavan Deolasee, Heikki Linnakangas)
+
+
+
+ It was possible for these files to not be correctly restored during
+ crash recovery, or to be written incorrectly on a standby server.
+ Bogus entries in a free space map could lead to attempts to access
+ pages that have been truncated away from the relation itself, typically
+ producing errors like could not read block XXX>:
+ read only 0 of 8192 bytes. Checksum failures in the
+ visibility map are also possible, if checksumming is enabled.
+
+
+
+ Procedures for determining whether there is a problem and repairing it
+ if so are discussed at
+ >.
+
+
+
+
+
+ Fix SELECT FOR UPDATE/SHARE> to correctly lock tuples that
+ have been updated by a subsequently-aborted transaction
+ (Álvaro Herrera)
+
+
+
+ In 9.5 and later, the SELECT> would sometimes fail to
+ return such tuples at all. A failure has not been proven to occur in
+ earlier releases, but might be possible with concurrent updates.
+
+
+
+
+
+ Fix EvalPlanQual rechecks involving CTE scans (Tom Lane)
+
+
+
+ The recheck would always see the CTE as returning no rows, typically
+ leading to failure to update rows that were recently updated.
+
+
+
+
+
+ Fix improper repetition of previous results from hashed aggregation in
+ a subquery (Andrew Gierth)
+
+
+
+ The test to see if we can reuse a previously-computed hash table of
+ the aggregate state values neglected the possibility of an outer query
+ reference appearing in an aggregate argument expression. A change in
+ the value of such a reference should lead to recalculating the hash
+ table, but did not.
+
+
+
+
+
+ Fix EXPLAIN> to emit valid XML when
+ is on (Markus Winand)
+
+
+
+ Previously the XML output-format option produced syntactically invalid
+ tags such as <I/O-Read-Time>>. That is now
+ rendered as <I-O-Read-Time>>.
+
+
+
+
+
+ Suppress printing of zeroes for unmeasured times
+ in EXPLAIN> (Maksim Milyutin)
+
+
+
+ Certain option combinations resulted in printing zero values for times
+ that actually aren't ever measured in that combination. Our general
+ policy in EXPLAIN> is not to print such fields at all, so
+ do that consistently in all cases.
+
+
+
+
+
+ Fix timeout length when VACUUM> is waiting for exclusive
+ table lock so that it can truncate the table (Simon Riggs)
+
+
+
+ The timeout was meant to be 50 milliseconds, but it was actually only
+ 50 microseconds, causing VACUUM> to give up on truncation
+ much more easily than intended. Set it to the intended value.
+
+
+
+
+
+ Fix bugs in merging inherited CHECK> constraints while
+ creating or altering a table (Tom Lane, Amit Langote)
+
+
+
+ Allow identical CHECK> constraints to be added to a parent
+ and child table in either order. Prevent merging of a valid
+ constraint from the parent table with a NOT VALID>
+ constraint on the child. Likewise, prevent merging of a NO
+ INHERIT> child constraint with an inherited constraint.
+
+
+
+
+
+ Remove artificial restrictions on the values accepted
+ by numeric_in()> and numeric_recv()>
+ (Tom Lane)
+
+
+
+ We allow numeric values up to the limit of the storage format (more
+ than 1e100000>), so it seems fairly pointless
+ that numeric_in()> rejected scientific-notation exponents
+ above 1000. Likewise, it was silly for numeric_recv()> to
+ reject more than 1000 digits in an input value.
+
+
+
+
+
+ Avoid very-low-probability data corruption due to testing tuple
+ visibility without holding buffer lock (Thomas Munro, Peter Geoghegan,
+ Tom Lane)
+
+
+
+
+
+ Fix file descriptor leakage when truncating a temporary relation of
+ more than 1GB (Andres Freund)
+
+
+
+
+
+ Disallow starting a standalone backend with standby_mode>
+ turned on (Michael Paquier)
+
+
+
+ This can't do anything useful, since there will be no WAL receiver
+ process to fetch more WAL data; and it could result in misbehavior
+ in code that wasn't designed with this situation in mind.
+
+
+
+
+
+ Don't try to share SSL contexts across multiple connections
+ in libpq> (Heikki Linnakangas)
+
+
+
+ This led to assorted corner-case bugs, particularly when trying to use
+ different SSL parameters for different connections.
+
+
+
+
+
+ Avoid corner-case memory leak in libpq> (Tom Lane)
+
+
+
+ The reported problem involved leaking an error report
+ during PQreset()>, but there might be related cases.
+
+
+
+
+
+ Make ecpg>'s
--help> and
--version>
+ options work consistently with our other executables (Haribabu Kommi)
+
+
+
+
+
+ In pg_dump>, never dump range constructor functions
+ (Tom Lane)
+
+
+
+ This oversight led to pg_upgrade> failures with
+ extensions containing range types, due to duplicate creation of the
+ constructor functions.
+
+
+
+
+
+ In pg_xlogdump>, retry opening new WAL segments when
+ using
--follow> option (Magnus Hagander)
+
+
+
+ This allows for a possible delay in the server's creation of the next
+ segment.
+
+
+
+
+
+ Fix pg_xlogdump> to cope with a WAL file that begins
+ with a continuation record spanning more than one page (Pavan
+ Deolasee)
+
+
+
+
+
+ Fix contrib/intarray/bench/bench.pl> to print the results
+ of the EXPLAIN> it does when given the
-e> option
+ (Daniel Gustafsson)
+
+
+
+
+
+ Update Windows time zone mapping to recognize some time zone names
+ added in recent Windows versions (Michael Paquier)
+
+
+
+
+
+ Prevent failure of obsolete dynamic time zone abbreviations (Tom Lane)
+
+
+
+ If a dynamic time zone abbreviation does not match any entry in the
+ referenced time zone, treat it as equivalent to the time zone name.
+ This avoids unexpected failures when IANA removes abbreviations from
+ their time zone database, as they did in tzdata>
+ release 2016f and seem likely to do again in the future. The
+ consequences were not limited to not recognizing the individual
+ abbreviation; any mismatch caused
+ the pg_timezone_abbrevs> view to fail altogether.
+
+
+
+
+
+ Update time zone data files to tzdata> release 2016h
+ for DST law changes in Palestine and Turkey, plus historical
+ corrections for Turkey and some regions of Russia.
+ Switch to numeric abbreviations for some time zones in Antarctica,
+ the former Soviet Union, and Sri Lanka.
+
+
+
+ The IANA time zone database previously provided textual abbreviations
+ for all time zones, sometimes making up abbreviations that have little
+ or no currency among the local population. They are in process of
+ reversing that policy in favor of using numeric UTC offsets in zones
+ where there is no evidence of real-world use of an English
+ abbreviation. At least for the time being, PostgreSQL>
+ will continue to accept such removed abbreviations for timestamp input.
+ But they will not be shown in the pg_timezone_names>
+ view nor used for output.
+
+
+
+ In this update, AMT> is no longer shown as being in use to
+ mean Armenia Time. Therefore, we have changed the Default>
+ abbreviation set to interpret it as Amazon Time, thus UTC-4 not UTC+4.
+
+
+
+
+
+
+
+
Release 9.3.14
diff --git a/doc/src/sgml/release-9.4.sgml b/doc/src/sgml/release-9.4.sgml
index 51896924c9..94b028a065 100644
--- a/doc/src/sgml/release-9.4.sgml
+++ b/doc/src/sgml/release-9.4.sgml
@@ -1,6 +1,472 @@
+
+ Release 9.4.10
+
+
+ Release Date
+ 2016-10-27
+
+
+
+ This release contains a variety of fixes from 9.4.9.
+ For information about new features in the 9.4 major release, see
+ .
+
+
+
+ Migration to Version 9.4.10
+
+
+ A dump/restore is not required for those running 9.4.X.
+
+
+
+ However, if your installation has been affected by the bug described in
+ the first changelog entry below, then after updating you may need
+ to take action to repair corrupted free space maps.
+
+
+
+ Also, if you are upgrading from a version earlier than 9.4.6,
+ see .
+
+
+
+
+ Changes
+
+
+
+
+
+ Fix WAL-logging of truncation of relation free space maps and
+ visibility maps (Pavan Deolasee, Heikki Linnakangas)
+
+
+
+ It was possible for these files to not be correctly restored during
+ crash recovery, or to be written incorrectly on a standby server.
+ Bogus entries in a free space map could lead to attempts to access
+ pages that have been truncated away from the relation itself, typically
+ producing errors like could not read block XXX>:
+ read only 0 of 8192 bytes. Checksum failures in the
+ visibility map are also possible, if checksumming is enabled.
+
+
+
+ Procedures for determining whether there is a problem and repairing it
+ if so are discussed at
+ >.
+
+
+
+
+
+ Fix incorrect creation of GIN index WAL records on big-endian machines
+ (Tom Lane)
+
+
+
+ The typical symptom was unexpected GIN leaf action> errors
+ during WAL replay.
+
+
+
+
+
+ Fix SELECT FOR UPDATE/SHARE> to correctly lock tuples that
+ have been updated by a subsequently-aborted transaction
+ (Álvaro Herrera)
+
+
+
+ In 9.5 and later, the SELECT> would sometimes fail to
+ return such tuples at all. A failure has not been proven to occur in
+ earlier releases, but might be possible with concurrent updates.
+
+
+
+
+
+ Fix EvalPlanQual rechecks involving CTE scans (Tom Lane)
+
+
+
+ The recheck would always see the CTE as returning no rows, typically
+ leading to failure to update rows that were recently updated.
+
+
+
+
+
+ Fix improper repetition of previous results from hashed aggregation in
+ a subquery (Andrew Gierth)
+
+
+
+ The test to see if we can reuse a previously-computed hash table of
+ the aggregate state values neglected the possibility of an outer query
+ reference appearing in an aggregate argument expression. A change in
+ the value of such a reference should lead to recalculating the hash
+ table, but did not.
+
+
+
+
+
+ Fix query-lifespan memory leak in a bulk UPDATE> on a table
+ with a PRIMARY KEY> or REPLICA IDENTITY> index
+ (Tom Lane)
+
+
+
+
+
+ Fix EXPLAIN> to emit valid XML when
+ is on (Markus Winand)
+
+
+
+ Previously the XML output-format option produced syntactically invalid
+ tags such as <I/O-Read-Time>>. That is now
+ rendered as <I-O-Read-Time>>.
+
+
+
+
+
+ Suppress printing of zeroes for unmeasured times
+ in EXPLAIN> (Maksim Milyutin)
+
+
+
+ Certain option combinations resulted in printing zero values for times
+ that actually aren't ever measured in that combination. Our general
+ policy in EXPLAIN> is not to print such fields at all, so
+ do that consistently in all cases.
+
+
+
+
+
+ Fix timeout length when VACUUM> is waiting for exclusive
+ table lock so that it can truncate the table (Simon Riggs)
+
+
+
+ The timeout was meant to be 50 milliseconds, but it was actually only
+ 50 microseconds, causing VACUUM> to give up on truncation
+ much more easily than intended. Set it to the intended value.
+
+
+
+
+
+ Fix bugs in merging inherited CHECK> constraints while
+ creating or altering a table (Tom Lane, Amit Langote)
+
+
+
+ Allow identical CHECK> constraints to be added to a parent
+ and child table in either order. Prevent merging of a valid
+ constraint from the parent table with a NOT VALID>
+ constraint on the child. Likewise, prevent merging of a NO
+ INHERIT> child constraint with an inherited constraint.
+
+
+
+
+
+ Remove artificial restrictions on the values accepted
+ by numeric_in()> and numeric_recv()>
+ (Tom Lane)
+
+
+
+ We allow numeric values up to the limit of the storage format (more
+ than 1e100000>), so it seems fairly pointless
+ that numeric_in()> rejected scientific-notation exponents
+ above 1000. Likewise, it was silly for numeric_recv()> to
+ reject more than 1000 digits in an input value.
+
+
+
+
+
+ Avoid very-low-probability data corruption due to testing tuple
+ visibility without holding buffer lock (Thomas Munro, Peter Geoghegan,
+ Tom Lane)
+
+
+
+
+
+ Fix logical WAL decoding to work properly when a subtransaction's WAL
+ output is large enough to spill to disk (Andres Freund)
+
+
+
+
+
+
+ Fix buffer overread in logical WAL decoding (Tom Lane)
+
+
+
+ Logical decoding of a tuple update record read 23 bytes too many,
+ which was usually harmless but with very bad luck could result in a
+ crash.
+
+
+
+
+
+ Fix file descriptor leakage when truncating a temporary relation of
+ more than 1GB (Andres Freund)
+
+
+
+
+
+ Disallow starting a standalone backend with standby_mode>
+ turned on (Michael Paquier)
+
+
+
+ This can't do anything useful, since there will be no WAL receiver
+ process to fetch more WAL data; and it could result in misbehavior
+ in code that wasn't designed with this situation in mind.
+
+
+
+
+
+ Properly initialize replication slot state when recycling a
+ previously-used slot (Michael Paquier)
+
+
+
+ This failure to reset all of the fields of the slot could
+ prevent VACUUM> from removing dead tuples.
+
+
+
+
+
+ Round shared-memory allocation request to a multiple of the actual
+ huge page size when attempting to use huge pages on Linux (Tom Lane)
+
+
+
+ This avoids possible failures during munmap()> on systems
+ with atypical default huge page sizes. Except in crash-recovery
+ cases, there were no ill effects other than a log message.
+
+
+
+
+
+ Use a more random value for the dynamic shared memory control
+ segment's ID (Robert Haas, Tom Lane)
+
+
+
+ Previously, the same value would be chosen every time, because it was
+ derived from random()> but srandom()> had not
+ yet been called. While relatively harmless, this was not the intended
+ behavior.
+
+
+
+
+
+ On Windows, retry creation of the dynamic shared memory control
+ segment after an access-denied error (Kyotaro Horiguchi, Amit Kapila)
+
+
+
+ Windows sometimes returns ERROR_ACCESS_DENIED> rather
+ than ERROR_ALREADY_EXISTS> when there is an existing
+ segment. This led to postmaster startup failure due to believing that
+ the former was an unrecoverable error.
+
+
+
+
+
+ Don't try to share SSL contexts across multiple connections
+ in libpq> (Heikki Linnakangas)
+
+
+
+ This led to assorted corner-case bugs, particularly when trying to use
+ different SSL parameters for different connections.
+
+
+
+
+
+ Avoid corner-case memory leak in libpq> (Tom Lane)
+
+
+
+ The reported problem involved leaking an error report
+ during PQreset()>, but there might be related cases.
+
+
+
+
+
+ Make ecpg>'s
--help> and
--version>
+ options work consistently with our other executables (Haribabu Kommi)
+
+
+
+
+
+ Fix pgbench>'s calculation of average latency
+ (Fabien Coelho)
+
+
+
+ The calculation was incorrect when there were \sleep>
+ commands in the script, or when the test duration was specified in
+ number of transactions rather than total time.
+
+
+
+
+
+ In pg_dump>, never dump range constructor functions
+ (Tom Lane)
+
+
+
+ This oversight led to pg_upgrade> failures with
+ extensions containing range types, due to duplicate creation of the
+ constructor functions.
+
+
+
+
+
+ In pg_xlogdump>, retry opening new WAL segments when
+ using
--follow> option (Magnus Hagander)
+
+
+
+ This allows for a possible delay in the server's creation of the next
+ segment.
+
+
+
+
+
+ Fix pg_xlogdump> to cope with a WAL file that begins
+ with a continuation record spanning more than one page (Pavan
+ Deolasee)
+
+
+
+
+
+ Fix contrib/pg_buffercache> to work
+ when shared_buffers> exceeds 256GB (KaiGai Kohei)
+
+
+
+
+
+ Fix contrib/intarray/bench/bench.pl> to print the results
+ of the EXPLAIN> it does when given the
-e> option
+ (Daniel Gustafsson)
+
+
+
+
+
+ Install TAP test infrastructure so that it's available for extension
+ testing (Craig Ringer)
+
+
+
+ When PostgreSQL> has been configured
+ with
--enable-tap-tests>, make install> will now
+ install the Perl support files for TAP testing where PGXS can find
+ them. This allows non-core extensions to
+ use $(prove_check)> without extra tests.
+
+
+
+
+
+ In MSVC builds, include pg_recvlogical> in a
+ client-only installation (MauMau)
+
+
+
+
+
+ Update Windows time zone mapping to recognize some time zone names
+ added in recent Windows versions (Michael Paquier)
+
+
+
+
+
+ Prevent failure of obsolete dynamic time zone abbreviations (Tom Lane)
+
+
+
+ If a dynamic time zone abbreviation does not match any entry in the
+ referenced time zone, treat it as equivalent to the time zone name.
+ This avoids unexpected failures when IANA removes abbreviations from
+ their time zone database, as they did in tzdata>
+ release 2016f and seem likely to do again in the future. The
+ consequences were not limited to not recognizing the individual
+ abbreviation; any mismatch caused
+ the pg_timezone_abbrevs> view to fail altogether.
+
+
+
+
+
+ Update time zone data files to tzdata> release 2016h
+ for DST law changes in Palestine and Turkey, plus historical
+ corrections for Turkey and some regions of Russia.
+ Switch to numeric abbreviations for some time zones in Antarctica,
+ the former Soviet Union, and Sri Lanka.
+
+
+
+ The IANA time zone database previously provided textual abbreviations
+ for all time zones, sometimes making up abbreviations that have little
+ or no currency among the local population. They are in process of
+ reversing that policy in favor of using numeric UTC offsets in zones
+ where there is no evidence of real-world use of an English
+ abbreviation. At least for the time being, PostgreSQL>
+ will continue to accept such removed abbreviations for timestamp input.
+ But they will not be shown in the pg_timezone_names>
+ view nor used for output.
+
+
+
+ In this update, AMT> is no longer shown as being in use to
+ mean Armenia Time. Therefore, we have changed the Default>
+ abbreviation set to interpret it as Amazon Time, thus UTC-4 not UTC+4.
+
+
+
+
+
+
+
+
Release 9.4.9
diff --git a/doc/src/sgml/release-9.5.sgml b/doc/src/sgml/release-9.5.sgml
index c3f0f7051e..2102b1dd29 100644
--- a/doc/src/sgml/release-9.5.sgml
+++ b/doc/src/sgml/release-9.5.sgml
@@ -1,6 +1,818 @@
+
+ Release 9.5.5
+
+
+ Release Date
+ 2016-10-27
+
+
+
+ This release contains a variety of fixes from 9.5.4.
+ For information about new features in the 9.5 major release, see
+ .
+
+
+
+ Migration to Version 9.5.5
+
+
+ A dump/restore is not required for those running 9.5.X.
+
+
+
+ However, if your installation has been affected by the bug described in
+ the first changelog entry below, then after updating you may need
+ to take action to repair corrupted free space maps.
+
+
+
+ Also, if you are upgrading from a version earlier than 9.5.2,
+ see .
+
+
+
+
+ Changes
+
+
+
+
+
+ Fix WAL-logging of truncation of relation free space maps and
+ visibility maps (Pavan Deolasee, Heikki Linnakangas)
+
+
+
+ It was possible for these files to not be correctly restored during
+ crash recovery, or to be written incorrectly on a standby server.
+ Bogus entries in a free space map could lead to attempts to access
+ pages that have been truncated away from the relation itself, typically
+ producing errors like could not read block XXX>:
+ read only 0 of 8192 bytes. Checksum failures in the
+ visibility map are also possible, if checksumming is enabled.
+
+
+
+ Procedures for determining whether there is a problem and repairing it
+ if so are discussed at
+ >.
+
+
+
+
+
+
+ Fix incorrect creation of GIN index WAL records on big-endian machines
+ (Tom Lane)
+
+
+
+ The typical symptom was unexpected GIN leaf action> errors
+ during WAL replay.
+
+
+
+
+
+
+ Fix SELECT FOR UPDATE/SHARE> to correctly lock tuples that
+ have been updated by a subsequently-aborted transaction
+ (Álvaro Herrera)
+
+
+
+ In 9.5 and later, the SELECT> would sometimes fail to
+ return such tuples at all. A failure has not been proven to occur in
+ earlier releases, but might be possible with concurrent updates.
+
+
+
+
+
+
+ Fix EvalPlanQual rechecks involving CTE scans (Tom Lane)
+
+
+
+ The recheck would always see the CTE as returning no rows, typically
+ leading to failure to update rows that were recently updated.
+
+
+
+
+
+
+ Fix deletion of speculatively inserted TOAST tuples when backing out
+ of INSERT ... ON CONFLICT> (Oskari Saarenmaa)
+
+
+
+ In the race condition where two transactions try to insert conflicting
+ tuples at about the same time, the loser would fail with
+ an attempted to delete invisible tuple> error if its
+ insertion included any TOAST'ed fields.
+
+
+
+
+
+ Don't throw serialization errors for self-conflicting insertions
+ in INSERT ... ON CONFLICT> (Thomas Munro, Peter Geoghegan)
+
+
+
+
+
+
+ Fix improper repetition of previous results from hashed aggregation in
+ a subquery (Andrew Gierth)
+
+
+
+ The test to see if we can reuse a previously-computed hash table of
+ the aggregate state values neglected the possibility of an outer query
+ reference appearing in an aggregate argument expression. A change in
+ the value of such a reference should lead to recalculating the hash
+ table, but did not.
+
+
+
+
+
+
+ Fix query-lifespan memory leak in a bulk UPDATE> on a table
+ with a PRIMARY KEY> or REPLICA IDENTITY> index
+ (Tom Lane)
+
+
+
+
+
+ Fix COPY> with a column name list from a table that has
+ row-level security enabled (Adam Brightwell)
+
+
+
+
+
+ Fix EXPLAIN> to emit valid XML when
+ is on (Markus Winand)
+
+
+
+ Previously the XML output-format option produced syntactically invalid
+ tags such as <I/O-Read-Time>>. That is now
+ rendered as <I-O-Read-Time>>.
+
+
+
+
+
+
+ Suppress printing of zeroes for unmeasured times
+ in EXPLAIN> (Maksim Milyutin)
+
+
+
+ Certain option combinations resulted in printing zero values for times
+ that actually aren't ever measured in that combination. Our general
+ policy in EXPLAIN> is not to print such fields at all, so
+ do that consistently in all cases.
+
+
+
+
+
+ Fix statistics update for TRUNCATE> in a prepared
+ transaction (Stas Kelvich)
+
+
+
+
+
+
+ Fix timeout length when VACUUM> is waiting for exclusive
+ table lock so that it can truncate the table (Simon Riggs)
+
+
+
+ The timeout was meant to be 50 milliseconds, but it was actually only
+ 50 microseconds, causing VACUUM> to give up on truncation
+ much more easily than intended. Set it to the intended value.
+
+
+
+
+
+ Fix bugs in merging inherited CHECK> constraints while
+ creating or altering a table (Tom Lane, Amit Langote)
+
+
+
+ Allow identical CHECK> constraints to be added to a parent
+ and child table in either order. Prevent merging of a valid
+ constraint from the parent table with a NOT VALID>
+ constraint on the child. Likewise, prevent merging of a NO
+ INHERIT> child constraint with an inherited constraint.
+
+
+
+
+
+ Show a sensible value
+ in pg_settings>.unit>
+ for min_wal_size> and max_wal_size> (Tom Lane)
+
+
+
+
+
+
+ Remove artificial restrictions on the values accepted
+ by numeric_in()> and numeric_recv()>
+ (Tom Lane)
+
+
+
+ We allow numeric values up to the limit of the storage format (more
+ than 1e100000>), so it seems fairly pointless
+ that numeric_in()> rejected scientific-notation exponents
+ above 1000. Likewise, it was silly for numeric_recv()> to
+ reject more than 1000 digits in an input value.
+
+
+
+
+
+ Avoid very-low-probability data corruption due to testing tuple
+ visibility without holding buffer lock (Thomas Munro, Peter Geoghegan,
+ Tom Lane)
+
+
+
+
+
+ Fix logical WAL decoding to work properly when a subtransaction's WAL
+ output is large enough to spill to disk (Andres Freund)
+
+
+
+
+
+
+ Fix possible sorting error when aborting use of abbreviated keys
+ (Peter Geoghegan)
+
+
+
+ In the worst case, this could result in a corrupt btree index, which
+ would need to be rebuilt using REINDEX>. However, the
+ situation is believed to be rare.
+
+
+
+
+
+
+ Fix file descriptor leakage when truncating a temporary relation of
+ more than 1GB (Andres Freund)
+
+
+
+
+
+
+ Disallow starting a standalone backend with standby_mode>
+ turned on (Michael Paquier)
+
+
+
+ This can't do anything useful, since there will be no WAL receiver
+ process to fetch more WAL data; and it could result in misbehavior
+ in code that wasn't designed with this situation in mind.
+
+
+
+
+
+
+ Properly initialize replication slot state when recycling a
+ previously-used slot (Michael Paquier)
+
+
+
+ This failure to reset all of the fields of the slot could
+ prevent VACUUM> from removing dead tuples.
+
+
+
+
+
+ Round shared-memory allocation request to a multiple of the actual
+ huge page size when attempting to use huge pages on Linux (Tom Lane)
+
+
+
+ This avoids possible failures during munmap()> on systems
+ with atypical default huge page sizes. Except in crash-recovery
+ cases, there were no ill effects other than a log message.
+
+
+
+
+
+
+ Use a more random value for the dynamic shared memory control
+ segment's ID (Robert Haas, Tom Lane)
+
+
+
+ Previously, the same value would be chosen every time, because it was
+ derived from random()> but srandom()> had not
+ yet been called. While relatively harmless, this was not the intended
+ behavior.
+
+
+
+
+
+
+ On Windows, retry creation of the dynamic shared memory control
+ segment after an access-denied error (Kyotaro Horiguchi, Amit Kapila)
+
+
+
+ Windows sometimes returns ERROR_ACCESS_DENIED> rather
+ than ERROR_ALREADY_EXISTS> when there is an existing
+ segment. This led to postmaster startup failure due to believing that
+ the former was an unrecoverable error.
+
+
+
+
+
+
+ Fix PL/pgSQL> to not misbehave with parameters and
+ local variables of type int2vector> or oidvector>
+ (Tom Lane)
+
+
+
+
+
+ Don't try to share SSL contexts across multiple connections
+ in libpq> (Heikki Linnakangas)
+
+
+
+ This led to assorted corner-case bugs, particularly when trying to use
+ different SSL parameters for different connections.
+
+
+
+
+
+ Avoid corner-case memory leak in libpq> (Tom Lane)
+
+
+
+ The reported problem involved leaking an error report
+ during PQreset()>, but there might be related cases.
+
+
+
+
+
+
+ Make ecpg>'s
--help> and
--version>
+ options work consistently with our other executables (Haribabu Kommi)
+
+
+
+
+
+
+ Fix pgbench>'s calculation of average latency
+ (Fabien Coelho)
+
+
+
+ The calculation was incorrect when there were \sleep>
+ commands in the script, or when the test duration was specified in
+ number of transactions rather than total time.
+
+
+
+
+
+ In pg_upgrade>, check library loadability in name order
+ (Tom Lane)
+
+
+
+ This is a workaround to deal with cross-extension dependencies from
+ language transform modules to their base language and data type
+ modules.
+
+
+
+
+
+
+ In pg_dump>, never dump range constructor functions
+ (Tom Lane)
+
+
+
+ This oversight led to pg_upgrade> failures with
+ extensions containing range types, due to duplicate creation of the
+ constructor functions.
+
+
+
+
+
+
+ In pg_dump> with
-C>,
+ suppress TABLESPACE> clause of CREATE DATABASE>
+ if
--no-tablespaces> is specified (Tom Lane)
+
+
+
+
+
+
+ Make pg_receivexlog> work correctly
+ with
--synchronous> without slots (Gabriele Bartolini)
+
+
+
+
+
+ Disallow specifying both
--source-server>
+ and
--source-target> options to pg_rewind>
+ (Michael Banck)
+
+
+
+
+
+ Make pg_rewind> turn off synchronous_commit>
+ in its session on the source server (Michael Banck, Michael Paquier)
+
+
+
+ This allows pg_rewind> to work even when the source
+ server is using synchronous replication that is not working for some
+ reason.
+
+
+
+
+
+ In pg_xlogdump>, retry opening new WAL segments when
+ using
--follow> option (Magnus Hagander)
+
+
+
+ This allows for a possible delay in the server's creation of the next
+ segment.
+
+
+
+
+
+
+ Fix pg_xlogdump> to cope with a WAL file that begins
+ with a continuation record spanning more than one page (Pavan
+ Deolasee)
+
+
+
+
+
+
+ Fix contrib/pg_buffercache> to work
+ when shared_buffers> exceeds 256GB (KaiGai Kohei)
+
+
+
+
+
+
+ Fix contrib/intarray/bench/bench.pl> to print the results
+ of the EXPLAIN> it does when given the
-e> option
+ (Daniel Gustafsson)
+
+
+
+
+
+
+ Support OpenSSL 1.1.0 (Heikki Linnakangas)
+
+
+
+
+
+
+ Install TAP test infrastructure so that it's available for extension
+ testing (Craig Ringer)
+
+
+
+ When PostgreSQL> has been configured
+ with
--enable-tap-tests>, make install> will now
+ install the Perl support files for TAP testing where PGXS can find
+ them. This allows non-core extensions to
+ use $(prove_check)> without extra tests.
+
+
+
+
+
+
+ In MSVC builds, include pg_recvlogical> in a
+ client-only installation (MauMau)
+
+
+
+
+
+
+ Update Windows time zone mapping to recognize some time zone names
+ added in recent Windows versions (Michael Paquier)
+
+
+
+
+
+
+ Prevent failure of obsolete dynamic time zone abbreviations (Tom Lane)
+
+
+
+ If a dynamic time zone abbreviation does not match any entry in the
+ referenced time zone, treat it as equivalent to the time zone name.
+ This avoids unexpected failures when IANA removes abbreviations from
+ their time zone database, as they did in tzdata>
+ release 2016f and seem likely to do again in the future. The
+ consequences were not limited to not recognizing the individual
+ abbreviation; any mismatch caused
+ the pg_timezone_abbrevs> view to fail altogether.
+
+
+
+
+
+ Update time zone data files to tzdata> release 2016h
+ for DST law changes in Palestine and Turkey, plus historical
+ corrections for Turkey and some regions of Russia.
+ Switch to numeric abbreviations for some time zones in Antarctica,
+ the former Soviet Union, and Sri Lanka.
+
+
+
+ The IANA time zone database previously provided textual abbreviations
+ for all time zones, sometimes making up abbreviations that have little
+ or no currency among the local population. They are in process of
+ reversing that policy in favor of using numeric UTC offsets in zones
+ where there is no evidence of real-world use of an English
+ abbreviation. At least for the time being, PostgreSQL>
+ will continue to accept such removed abbreviations for timestamp input.
+ But they will not be shown in the pg_timezone_names>
+ view nor used for output.
+
+
+
+ In this update, AMT> is no longer shown as being in use to
+ mean Armenia Time. Therefore, we have changed the Default>
+ abbreviation set to interpret it as Amazon Time, thus UTC-4 not UTC+4.
+
+
+
+
+
+
+
+
Release 9.5.4
diff --git a/doc/src/sgml/release-9.6.sgml b/doc/src/sgml/release-9.6.sgml
index ebdeda4445..8b3f51428d 100644
--- a/doc/src/sgml/release-9.6.sgml
+++ b/doc/src/sgml/release-9.6.sgml
@@ -62,7 +62,7 @@ Branch: REL9_3_STABLE [1c02ee314] 2016-10-19 15:00:34 +0300
Procedures for determining whether there is a problem and repairing it
if so are discussed at
- >.
+ >.
@@ -96,89 +96,20 @@ Branch: REL9_6_STABLE [b6d906073] 2016-09-30 20:39:06 -0400
with contrib/pg_visibility>'s
pg_truncate_visibility_map()> function.
For more information see
- >.
-
-
-
-
-
-
- Fix incorrect creation of GIN index WAL records on big-endian machines
- (Tom Lane)
-
-
-
- The typical symptom was unexpected GIN leaf action> errors
- during WAL replay.
-
-
-
-
-
-
- Fix SELECT FOR UPDATE/SHARE> to correctly lock tuples that
- have been updated by a subsequently-aborted transaction
- (Álvaro Herrera)
-
-
-
- In 9.5 and later, the SELECT> would sometimes fail to
- return such tuples at all. A failure has not been proven to occur in
- earlier releases, but might be possible with concurrent updates.
+ >.
- Fix EvalPlanQual rechecks involving CTE scans (Tom Lane)
-
-
-
- The recheck would always see the CTE as returning no rows, typically
- leading to failure to update rows that were recently updated.
-
-
-
-
-
-
- Fix deletion of speculatively inserted TOAST tuples when backing out
- of INSERT ... ON CONFLICT> (Oskari Saarenmaa)
-
-
-
- In the race condition where two transactions try to insert conflicting
- tuples at about the same time, the loser would fail with
- an attempted to delete invisible tuple> error if its
- insertion included any TOAST'ed fields.
+ Don't throw serialization errors for self-conflicting insertions
+ in INSERT ... ON CONFLICT> (Thomas Munro, Peter Geoghegan)
@@ -219,46 +150,6 @@ Branch: REL9_6_STABLE [dca25c256] 2016-10-09 12:49:37 -0400
-
- Fix improper repetition of previous results from hashed aggregation in
- a subquery (Andrew Gierth)
-
-
-
- The test to see if we can reuse a previously-computed hash table of
- the aggregate state values neglected the possibility of an outer query
- reference appearing in an aggregate argument expression. A change in
- the value of such a reference should lead to recalculating the hash
- table, but did not.
-
-
-
-
-
-
- Fix query-lifespan memory leak in a bulk UPDATE> on a table
- with a PRIMARY KEY> or REPLICA IDENTITY> index
- (Tom Lane)
-
-
-
-
-
-
- Suppress printing of zeroes for unmeasured times
- in EXPLAIN> (Maksim Milyutin)
-
-
-
- Certain option combinations resulted in printing zero values for times
- that actually aren't ever measured in that combination. Our general
- policy in EXPLAIN> is not to print such fields at all, so
- do that consistently in all cases.
-
-
-
-
-
-
- Fix timeout length when VACUUM> is waiting for exclusive
- table lock so that it can truncate the table (Simon Riggs)
-
-
-
- The timeout was meant to be 50 milliseconds, but it was actually only
- 50 microseconds, causing VACUUM> to give up on truncation
- much more easily than intended. Set it to the intended value.
-
-
-
-
-
- Remove artificial restrictions on the values accepted
- by numeric_in()> and numeric_recv()>
- (Tom Lane)
-
-
-
- We allow numeric values up to the limit of the storage format (more
- than 1e100000>), so it seems fairly pointless
- that numeric_in()> rejected scientific-notation exponents
- above 1000. Likewise, it was silly for numeric_recv()> to
- reject more than 1000 digits in an input value.
+ Avoid very-low-probability data corruption due to testing tuple
+ visibility without holding buffer lock (Thomas Munro, Peter Geoghegan,
+ Tom Lane)
@@ -464,100 +306,6 @@ Branch: REL9_6_STABLE [32841fa32] 2016-09-28 11:22:39 -0400
-
- Fix buffer overread in logical WAL decoding (Tom Lane)
-
-
-
- Logical decoding of a tuple update record read 23 bytes too many,
- which was usually harmless but with very bad luck could result in a
- crash.
-
-
-
-
-
-
- Fix possible sorting error when aborting use of abbreviated keys
- (Peter Geoghegan)
-
-
-
- In the worst case, this could result in a corrupt btree index, which
- would need to be rebuilt using REINDEX>. However, the
- situation is believed to be rare.
-
-
-
-
-
-
- Fix file descriptor leakage when truncating a temporary relation of
- more than 1GB (Andres Freund)
-
-
-
-
-
-
- Disallow starting a standalone backend with standby_mode>
- turned on (Michael Paquier)
-
-
-
- This can't do anything useful, since there will be no WAL receiver
- process to fetch more WAL data; and it could result in misbehavior
- in code that wasn't designed with this situation in mind.
-
-
-
-
-
-
- Properly initialize replication slot state when recycling a
- previously-used slot (Michael Paquier)
-
-
-
- This failure to reset all of the fields of the slot could
- prevent VACUUM> from removing dead tuples.
-
-
-
-
-
-
- Use a more random value for the dynamic shared memory control
- segment's ID (Robert Haas, Tom Lane)
-
-
-
- Previously, the same value would be chosen every time, because it was
- derived from random()> but srandom()> had not
- yet been called. While relatively harmless, this was not the intended
- behavior.
-
-
-
-
-
-
- On Windows, retry creation of the dynamic shared memory control
- segment after an access-denied error (Kyotaro Horiguchi, Amit Kapila)
-
-
-
- Windows sometimes returns ERROR_ACCESS_DENIED> rather
- than ERROR_ALREADY_EXISTS> when there is an existing
- segment. This led to postmaster startup failure due to believing that
- the former was an unrecoverable error.
-
-
-
-
-
-
- Fix PL/pgSQL> to not misbehave with parameters and
- local variables of type int2vector> or oidvector>
- (Tom Lane)
-
-
-
-
-
-
- Make ecpg>'s
--help> and
--version>
- options work consistently with our other executables (Haribabu Kommi)
-
-
-
-
-
-
- Fix pgbench>'s calculation of average latency
- (Fabien Coelho)
-
-
-
- The calculation was incorrect when there were \sleep>
- commands in the script, or when the test duration was specified in
- number of transactions rather than total time.
-
-
-
-
-
-
- In pg_dump>, never dump range constructor functions
- (Tom Lane)
-
-
-
- This oversight led to pg_upgrade> failures with
- extensions containing range types, due to duplicate creation of the
- constructor functions.
-
-
-
-
-
-
- In pg_dump> with
-C>,
- suppress TABLESPACE> clause of CREATE DATABASE>
- if
--no-tablespaces> is specified (Tom Lane)
-
-
-
-
-
@@ -827,20 +437,6 @@ Branch: REL9_6_STABLE [1749332ec] 2016-10-07 09:51:28 -0400
-
- Make pg_receivexlog> work correctly
- with
--synchronous> without slots (Gabriele Bartolini)
-
-
-
-
-
-
- Fix pg_xlogdump> to cope with a WAL file that begins
- with a continuation record spanning more than one page (Pavan
- Deolasee)
-
-
-
-
-
-
- Fix contrib/pg_buffercache> to work
- when shared_buffers> exceeds 256GB (KaiGai Kohei)
-
-
-
-
-
-
- Fix contrib/intarray/bench/bench.pl> to print the results
- of the EXPLAIN> it does when given the
-e> option
- (Daniel Gustafsson)
-
-
-
-
-
-
- Support OpenSSL 1.1.0 (Heikki Linnakangas)
-
-
-
-
-
-
- Install TAP test infrastructure so that it's available for extension
- testing (Craig Ringer)
-
-
-
- When PostgreSQL> has been configured
- with
--enable-tap-tests>, make install> will now
- install the Perl support files for TAP testing where PGXS can find
- them. This allows non-core extensions to
- use $(prove_check)> without extra tests.
-
-
-
-
-
-
- In MSVC builds, include pg_recvlogical> in a
- client-only installation (MauMau)
-
-
-
-
-
-
- Update Windows time zone mapping to recognize some time zone names
- added in recent Windows versions (Michael Paquier)
-
-
-
-
-
-
- Prevent failure of obsolete dynamic time zone abbreviations (Tom Lane)
-
-
-
- If a dynamic time zone abbreviation does not match any entry in the
- referenced time zone, treat it as equivalent to the time zone name.
- This avoids unexpected failures when IANA removes abbreviations from
- their time zone database, as they did in tzdata>
- release 2016f and seem likely to do again in the future. The
- consequences were not limited to not recognizing the individual
- abbreviation; any mismatch caused
- the pg_timezone_abbrevs> view to fail altogether.
-
-
-
-
-
+
+ Preserve commit timestamps across server restart
+ (Julien Rouhaud, Craig Ringer)
+
+
+
+ With turned on, old
+ commit timestamps became inaccessible after a clean server restart.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ |
+
+
+
+
+
+
+
+
+
+
--
cgit v1.2.3
From 59fa9d2d9da46097dd4da5c5f1f07e22a288fccf Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Fri, 14 Oct 2016 12:00:00 -0400
Subject: pg_test_timing: Add NLS
Also straighten out use of time unit abbreviations a bit.
Reviewed-by: Michael Paquier
---
doc/src/sgml/ref/pgtesttiming.sgml | 16 ++++++++--------
src/bin/pg_test_timing/nls.mk | 4 ++++
src/bin/pg_test_timing/pg_test_timing.c | 28 ++++++++++++++++------------
3 files changed, 28 insertions(+), 20 deletions(-)
create mode 100644 src/bin/pg_test_timing/nls.mk
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/pgtesttiming.sgml b/doc/src/sgml/ref/pgtesttiming.sgml
index f07a0600ff..e3539cf764 100644
--- a/doc/src/sgml/ref/pgtesttiming.sgml
+++ b/doc/src/sgml/ref/pgtesttiming.sgml
@@ -96,9 +96,9 @@
Testing timing overhead for 3 seconds.
-Per loop time including overhead: 35.96 nsec
+Per loop time including overhead: 35.96 ns
Histogram of timing durations:
-< usec % of total count
+ < us % of total count
1 96.40465 80435604
2 3.59518 2999652
4 0.00015 126
@@ -109,9 +109,9 @@ Histogram of timing durations:
Note that different units are used for the per loop time than the
- histogram. The loop can have resolution within a few nanoseconds (nsec),
+ histogram. The loop can have resolution within a few nanoseconds (ns),
while the individual timing calls can only resolve down to one microsecond
- (usec).
+ (us).
@@ -157,9 +157,9 @@ EXPLAIN ANALYZE SELECT COUNT(*) FROM t;
tsc hpet acpi_pm
# echo acpi_pm > /sys/devices/system/clocksource/clocksource0/current_clocksource
# pg_test_timing
-Per loop time including overhead: 722.92 nsec
+Per loop time including overhead: 722.92 ns
Histogram of timing durations:
-< usec % of total count
+ < us % of total count
1 27.84870 1155682
2 72.05956 2990371
4 0.07810 3241
@@ -170,7 +170,7 @@ Histogram of timing durations:
In this configuration, the sample EXPLAIN ANALYZE above
- takes 115.9 ms. That's 1061 nsec of timing overhead, again a small multiple
+ takes 115.9 ms. That's 1061 ns of timing overhead, again a small multiple
of what's measured directly by this utility. That much timing overhead
means the actual query itself is only taking a tiny fraction of the
accounted for time, most of it is being consumed in overhead instead. In
@@ -211,7 +211,7 @@ $ pg_test_timing
Testing timing overhead for 3 seconds.
Per timing duration including loop overhead: 97.75 ns
Histogram of timing durations:
-< usec % of total count
+ < us % of total count
1 90.23734 27694571
2 9.75277 2993204
4 0.00981 3010
diff --git a/src/bin/pg_test_timing/nls.mk b/src/bin/pg_test_timing/nls.mk
new file mode 100644
index 0000000000..e12ea5cfdb
--- /dev/null
+++ b/src/bin/pg_test_timing/nls.mk
@@ -0,0 +1,4 @@
+# src/bin/pg_test_timing/nls.mk
+CATALOG_NAME = pg_test_timing
+AVAIL_LANGUAGES =
+GETTEXT_FILES = pg_test_timing.c
diff --git a/src/bin/pg_test_timing/pg_test_timing.c b/src/bin/pg_test_timing/pg_test_timing.c
index e5c11de6bb..2f1ab7cd60 100644
--- a/src/bin/pg_test_timing/pg_test_timing.c
+++ b/src/bin/pg_test_timing/pg_test_timing.c
@@ -25,6 +25,7 @@ main(int argc, char *argv[])
{
uint64 loop_count;
+ set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_timing"));
progname = get_progname(argv[0]);
handle_args(argc, argv);
@@ -51,7 +52,7 @@ handle_args(int argc, char *argv[])
{
if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
{
- printf("Usage: %s [-d DURATION]\n", progname);
+ printf(_("Usage: %s [-d DURATION]\n"), progname);
exit(0);
}
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
@@ -71,7 +72,7 @@ handle_args(int argc, char *argv[])
break;
default:
- fprintf(stderr, "Try \"%s --help\" for more information.\n",
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
progname);
exit(1);
break;
@@ -81,23 +82,26 @@ handle_args(int argc, char *argv[])
if (argc > optind)
{
fprintf(stderr,
- "%s: too many command-line arguments (first is \"%s\")\n",
+ _("%s: too many command-line arguments (first is \"%s\")\n"),
progname, argv[optind]);
- fprintf(stderr, "Try \"%s --help\" for more information.\n",
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
progname);
exit(1);
}
if (test_duration > 0)
{
- printf("Testing timing overhead for %d seconds.\n", test_duration);
+ printf(ngettext("Testing timing overhead for %d second.\n",
+ "Testing timing overhead for %d seconds.\n",
+ test_duration),
+ test_duration);
}
else
{
fprintf(stderr,
- "%s: duration must be a positive integer (duration is \"%d\")\n",
+ _("%s: duration must be a positive integer (duration is \"%d\")\n"),
progname, test_duration);
- fprintf(stderr, "Try \"%s --help\" for more information.\n",
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
progname);
exit(1);
}
@@ -133,8 +137,8 @@ test_timing(int32 duration)
/* Did time go backwards? */
if (diff < 0)
{
- printf("Detected clock going backwards in time.\n");
- printf("Time warp: %d microseconds\n", diff);
+ fprintf(stderr, _("Detected clock going backwards in time.\n"));
+ fprintf(stderr, _("Time warp: %d ms\n"), diff);
exit(1);
}
@@ -157,7 +161,7 @@ test_timing(int32 duration)
INSTR_TIME_SUBTRACT(end_time, start_time);
- printf("Per loop time including overhead: %0.2f nsec\n",
+ printf(_("Per loop time including overhead: %0.2f ns\n"),
INSTR_TIME_GET_DOUBLE(end_time) * 1e9 / loop_count);
return loop_count;
@@ -173,8 +177,8 @@ output(uint64 loop_count)
while (max_bit > 0 && histogram[max_bit] == 0)
max_bit--;
- printf("Histogram of timing durations:\n");
- printf("%6s %10s %10s\n", "< usec", "% of total", "count");
+ printf(_("Histogram of timing durations:\n"));
+ printf("%6s %10s %10s\n", _("< us"), _("% of total"), _("count"));
for (i = 0; i <= max_bit; i++)
{
--
cgit v1.2.3
From 8c48375e5f43ebd832f93c9166d1fe0e639ff806 Mon Sep 17 00:00:00 2001
From: Kevin Grittner
Date: Fri, 4 Nov 2016 10:49:50 -0500
Subject: Implement syntax for transition tables in AFTER triggers.
This is infrastructure for the complete SQL standard feature. No
support is included at this point for execution nodes or PLs. The
intent is to add that soon.
As this patch leaves things, standard syntax can create tuplestores
to contain old and/or new versions of rows affected by a statement.
References to these tuplestores are in the TriggerData structure.
C triggers can access the tuplestores directly, so they are usable,
but they cannot yet be referenced within a SQL statement.
---
doc/src/sgml/catalogs.sgml | 16 ++
doc/src/sgml/ref/create_trigger.sgml | 94 ++++++++--
src/backend/commands/tablecmds.c | 5 +-
src/backend/commands/trigger.c | 327 +++++++++++++++++++++++++++++++++--
src/backend/nodes/copyfuncs.c | 16 ++
src/backend/nodes/equalfuncs.c | 14 ++
src/backend/nodes/outfuncs.c | 13 ++
src/backend/parser/gram.y | 70 +++++++-
src/backend/utils/adt/ruleutils.c | 23 +++
src/include/catalog/catversion.h | 2 +-
src/include/catalog/pg_trigger.h | 13 +-
src/include/commands/trigger.h | 2 +
src/include/nodes/nodes.h | 1 +
src/include/nodes/parsenodes.h | 17 ++
src/include/parser/kwlist.h | 3 +
src/include/utils/reltrigger.h | 7 +
16 files changed, 580 insertions(+), 43 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 29738b07cb..bac169a19e 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -6231,6 +6231,22 @@
representation) for the trigger's WHEN> condition, or null
if none
+
+
+ tgoldtable
+ name
+
+ REFERENCING> clause name for OLD TABLE>,
+ or null if none
+
+
+
+ tgnewtable
+ name
+
+ REFERENCING> clause name for NEW TABLE>,
+ or null if none
+
diff --git a/doc/src/sgml/ref/create_trigger.sgml b/doc/src/sgml/ref/create_trigger.sgml
index 4bde815012..8590e226e3 100644
--- a/doc/src/sgml/ref/create_trigger.sgml
+++ b/doc/src/sgml/ref/create_trigger.sgml
@@ -25,6 +25,7 @@ CREATE [ CONSTRAINT ] TRIGGER name
ON table_name
[ FROM referenced_table_name ]
[ NOT DEFERRABLE | [ DEFERRABLE ] [ INITIALLY IMMEDIATE | INITIALLY DEFERRED ] ]
+ [ REFERENCING { { OLD | NEW } TABLE [ AS ] transition_relation_name } [ ... ] ]
[ FOR [ EACH ] { ROW | STATEMENT } ]
[ WHEN ( condition ) ]
EXECUTE PROCEDURE function_name ( arguments )
@@ -177,6 +178,15 @@ CREATE [ CONSTRAINT ] TRIGGER name
when the constraints they implement are violated.
+
+ The REFERENCING> option is only allowed for an AFTER>
+ trigger which is not a constraint trigger. OLD TABLE> may only
+ be specified once, and only on a trigger which can fire on
+ UPDATE> or DELETE>. NEW TABLE> may only
+ be specified once, and only on a trigger which can fire on
+ UPDATE> or INSERT>.
+
+
SELECT does not modify any rows so you cannot
create SELECT triggers. Rules and views are more
@@ -281,6 +291,40 @@ UPDATE OF column_name1 [, column_name2
+
+ REFERENCING
+
+
+ This immediately preceeds the declaration of one or two relations which
+ can be used to read the before and/or after images of all rows directly
+ affected by the triggering statement. An AFTER EACH ROW>
+ trigger is allowed to use both these transition relation names and the
+ row names (OLD> and NEW>) which reference each
+ individual row for which the trigger fires.
+
+
+
+
+
+ OLD TABLE
+ NEW TABLE
+
+
+ This specifies whether the named relation contains the before or after
+ images for rows affected by the statement which fired the trigger.
+
+
+
+
+
+ transition_relation_name
+
+
+ The (unqualified) name to be used within the trigger for this relation.
+
+
+
+
FOR EACH ROWFOR EACH STATEMENT
@@ -474,6 +518,30 @@ CREATE TRIGGER view_insert
FOR EACH ROW
EXECUTE PROCEDURE view_insert_row();
+
+ Execute the function check_transfer_balances_to_zero> for each
+ statement to confirm that the transfer> rows offset to a net of
+ zero:
+
+
+CREATE TRIGGER transfer_insert
+ AFTER INSERT ON transfer
+ FOR EACH STATEMENT
+ REFERENCING NEW TABLE AS inserted
+ EXECUTE PROCEDURE check_transfer_balances_to_zero();
+
+
+ Execute the function check_matching_pairs> for each row to
+ confirm that changes are made to matching pairs at the same time (by the
+ same statement):
+
+
+CREATE TRIGGER paired_items_update
+ AFTER UPDATE ON paired_items
+ FOR EACH ROW
+ REFERENCING NEW TABLE AS newtab OLD TABLE AS oldtab
+ EXECUTE PROCEDURE check_matching_pairs();
+
@@ -502,24 +570,14 @@ CREATE TRIGGER view_insert
- SQL allows you to define aliases for the old
- and new rows or tables for use in the definition
- of the triggered action (e.g., CREATE TRIGGER ... ON
- tablename REFERENCING OLD ROW AS somename NEW ROW AS othername
- ...). Since PostgreSQL
- allows trigger procedures to be written in any number of
- user-defined languages, access to the data is handled in a
- language-specific way.
-
-
-
-
-
- PostgreSQL does not allow the old and new
- tables to be referenced in statement-level triggers, i.e., the tables
- that contain all the old and/or new rows, which are referred to by the
- OLD TABLE and NEW TABLE clauses in
- the SQL> standard.
+ While transition tables for AFTER> triggers are specified
+ using the REFERENCING> clause in the standard way, the row
+ variables used in FOR EACH ROW> triggers may not be
+ specified in REFERENCING> clause. They are available in a
+ manner which is dependent on the language in which the trigger function
+ is written. Some languages effectively behave as though there is a
+ REFERENCING> clause containing OLD ROW AS OLD NEW
+ ROW AS NEW>.
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 2137372c23..f97bee5b0e 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -7430,7 +7430,7 @@ validateForeignKeyConstraint(char *conname,
trig.tgconstraint = constraintOid;
trig.tgdeferrable = FALSE;
trig.tginitdeferred = FALSE;
- /* we needn't fill in tgargs or tgqual */
+ /* we needn't fill in remaining fields */
/*
* See if we can do it with a single LEFT JOIN query. A FALSE result
@@ -7514,6 +7514,7 @@ CreateFKCheckTrigger(Oid myRelOid, Oid refRelOid, Constraint *fkconstraint,
}
fk_trigger->columns = NIL;
+ fk_trigger->transitionRels = NIL;
fk_trigger->whenClause = NULL;
fk_trigger->isconstraint = true;
fk_trigger->deferrable = fkconstraint->deferrable;
@@ -7557,6 +7558,7 @@ createForeignKeyTriggers(Relation rel, Oid refRelOid, Constraint *fkconstraint,
fk_trigger->timing = TRIGGER_TYPE_AFTER;
fk_trigger->events = TRIGGER_TYPE_DELETE;
fk_trigger->columns = NIL;
+ fk_trigger->transitionRels = NIL;
fk_trigger->whenClause = NULL;
fk_trigger->isconstraint = true;
fk_trigger->constrrel = NULL;
@@ -7611,6 +7613,7 @@ createForeignKeyTriggers(Relation rel, Oid refRelOid, Constraint *fkconstraint,
fk_trigger->timing = TRIGGER_TYPE_AFTER;
fk_trigger->events = TRIGGER_TYPE_UPDATE;
fk_trigger->columns = NIL;
+ fk_trigger->transitionRels = NIL;
fk_trigger->whenClause = NULL;
fk_trigger->isconstraint = true;
fk_trigger->constrrel = NULL;
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 9de22a13d7..1c264b7736 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -164,6 +164,8 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString,
Oid constrrelid = InvalidOid;
ObjectAddress myself,
referenced;
+ char *oldtablename = NULL;
+ char *newtablename = NULL;
if (OidIsValid(relOid))
rel = heap_open(relOid, ShareRowExclusiveLock);
@@ -309,6 +311,87 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString,
errmsg("INSTEAD OF triggers cannot have column lists")));
}
+ /*
+ * We don't yet support naming ROW transition variables, but the parser
+ * recognizes the syntax so we can give a nicer message here.
+ *
+ * Per standard, REFERENCING TABLE names are only allowed on AFTER
+ * triggers. Per standard, REFERENCING ROW names are not allowed with FOR
+ * EACH STATEMENT. Per standard, each OLD/NEW, ROW/TABLE permutation is
+ * only allowed once. Per standard, OLD may not be specified when
+ * creating a trigger only for INSERT, and NEW may not be specified when
+ * creating a trigger only for DELETE.
+ *
+ * Notice that the standard allows an AFTER ... FOR EACH ROW trigger to
+ * reference both ROW and TABLE transition data.
+ */
+ if (stmt->transitionRels != NIL)
+ {
+ List *varList = stmt->transitionRels;
+ ListCell *lc;
+
+ foreach(lc, varList)
+ {
+ TriggerTransition *tt = (TriggerTransition *) lfirst(lc);
+
+ Assert(IsA(tt, TriggerTransition));
+
+ if (!(tt->isTable))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("ROW variable naming in the REFERENCING clause is not supported"),
+ errhint("Use OLD TABLE or NEW TABLE for naming transition tables.")));
+
+ /*
+ * Because of the above test, we omit further ROW-related testing
+ * below. If we later allow naming OLD and NEW ROW variables,
+ * adjustments will be needed below.
+ */
+
+ if (stmt->timing != TRIGGER_TYPE_AFTER)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("transition table name can only be specified for an AFTER trigger")));
+
+ if (tt->isNew)
+ {
+ if (!(TRIGGER_FOR_INSERT(tgtype) ||
+ TRIGGER_FOR_UPDATE(tgtype)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("NEW TABLE can only be specified for an INSERT or UPDATE trigger")));
+
+ if (newtablename != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("NEW TABLE cannot be specified multiple times")));
+
+ newtablename = tt->name;
+ }
+ else
+ {
+ if (!(TRIGGER_FOR_DELETE(tgtype) ||
+ TRIGGER_FOR_UPDATE(tgtype)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("OLD TABLE can only be specified for a DELETE or UPDATE trigger")));
+
+ if (oldtablename != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("OLD TABLE cannot be specified multiple times")));
+
+ oldtablename = tt->name;
+ }
+ }
+
+ if (newtablename != NULL && oldtablename != NULL &&
+ strcmp(newtablename, oldtablename) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("OLD TABLE name and NEW TABLE name cannot be the same")));
+ }
+
/*
* Parse the WHEN clause, if any
*/
@@ -664,6 +747,17 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString,
else
nulls[Anum_pg_trigger_tgqual - 1] = true;
+ if (oldtablename)
+ values[Anum_pg_trigger_tgoldtable - 1] = DirectFunctionCall1(namein,
+ CStringGetDatum(oldtablename));
+ else
+ nulls[Anum_pg_trigger_tgoldtable - 1] = true;
+ if (newtablename)
+ values[Anum_pg_trigger_tgnewtable - 1] = DirectFunctionCall1(namein,
+ CStringGetDatum(newtablename));
+ else
+ nulls[Anum_pg_trigger_tgnewtable - 1] = true;
+
tuple = heap_form_tuple(tgrel->rd_att, values, nulls);
/* force tuple to have the desired OID */
@@ -682,6 +776,10 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString,
pfree(DatumGetPointer(values[Anum_pg_trigger_tgname - 1]));
pfree(DatumGetPointer(values[Anum_pg_trigger_tgargs - 1]));
pfree(DatumGetPointer(values[Anum_pg_trigger_tgattr - 1]));
+ if (oldtablename)
+ pfree(DatumGetPointer(values[Anum_pg_trigger_tgoldtable - 1]));
+ if (newtablename)
+ pfree(DatumGetPointer(values[Anum_pg_trigger_tgnewtable - 1]));
/*
* Update relation's pg_class entry. Crucial side-effect: other backends
@@ -1584,6 +1682,23 @@ RelationBuildTriggers(Relation relation)
}
else
build->tgargs = NULL;
+
+ datum = fastgetattr(htup, Anum_pg_trigger_tgoldtable,
+ tgrel->rd_att, &isnull);
+ if (!isnull)
+ build->tgoldtable =
+ DatumGetCString(DirectFunctionCall1(nameout, datum));
+ else
+ build->tgoldtable = NULL;
+
+ datum = fastgetattr(htup, Anum_pg_trigger_tgnewtable,
+ tgrel->rd_att, &isnull);
+ if (!isnull)
+ build->tgnewtable =
+ DatumGetCString(DirectFunctionCall1(nameout, datum));
+ else
+ build->tgnewtable = NULL;
+
datum = fastgetattr(htup, Anum_pg_trigger_tgqual,
tgrel->rd_att, &isnull);
if (!isnull)
@@ -1680,6 +1795,19 @@ SetTriggerFlags(TriggerDesc *trigdesc, Trigger *trigger)
trigdesc->trig_truncate_after_statement |=
TRIGGER_TYPE_MATCHES(tgtype, TRIGGER_TYPE_STATEMENT,
TRIGGER_TYPE_AFTER, TRIGGER_TYPE_TRUNCATE);
+
+ trigdesc->trig_insert_new_table |=
+ (TRIGGER_FOR_INSERT(tgtype) &&
+ TRIGGER_USES_TRANSITION_TABLE(trigger->tgnewtable));
+ trigdesc->trig_update_old_table |=
+ (TRIGGER_FOR_UPDATE(tgtype) &&
+ TRIGGER_USES_TRANSITION_TABLE(trigger->tgoldtable));
+ trigdesc->trig_update_new_table |=
+ (TRIGGER_FOR_UPDATE(tgtype) &&
+ TRIGGER_USES_TRANSITION_TABLE(trigger->tgnewtable));
+ trigdesc->trig_delete_old_table |=
+ (TRIGGER_FOR_DELETE(tgtype) &&
+ TRIGGER_USES_TRANSITION_TABLE(trigger->tgoldtable));
}
/*
@@ -1729,6 +1857,10 @@ CopyTriggerDesc(TriggerDesc *trigdesc)
}
if (trigger->tgqual)
trigger->tgqual = pstrdup(trigger->tgqual);
+ if (trigger->tgoldtable)
+ trigger->tgoldtable = pstrdup(trigger->tgoldtable);
+ if (trigger->tgnewtable)
+ trigger->tgnewtable = pstrdup(trigger->tgnewtable);
trigger++;
}
@@ -1761,6 +1893,10 @@ FreeTriggerDesc(TriggerDesc *trigdesc)
}
if (trigger->tgqual)
pfree(trigger->tgqual);
+ if (trigger->tgoldtable)
+ pfree(trigger->tgoldtable);
+ if (trigger->tgnewtable)
+ pfree(trigger->tgnewtable);
trigger++;
}
pfree(trigdesc->triggers);
@@ -1839,6 +1975,18 @@ equalTriggerDescs(TriggerDesc *trigdesc1, TriggerDesc *trigdesc2)
return false;
else if (strcmp(trig1->tgqual, trig2->tgqual) != 0)
return false;
+ if (trig1->tgoldtable == NULL && trig2->tgoldtable == NULL)
+ /* ok */ ;
+ else if (trig1->tgoldtable == NULL || trig2->tgoldtable == NULL)
+ return false;
+ else if (strcmp(trig1->tgoldtable, trig2->tgoldtable) != 0)
+ return false;
+ if (trig1->tgnewtable == NULL && trig2->tgnewtable == NULL)
+ /* ok */ ;
+ else if (trig1->tgnewtable == NULL || trig2->tgnewtable == NULL)
+ return false;
+ else if (strcmp(trig1->tgnewtable, trig2->tgnewtable) != 0)
+ return false;
}
}
else if (trigdesc2 != NULL)
@@ -1870,6 +2018,18 @@ ExecCallTriggerFunc(TriggerData *trigdata,
Datum result;
MemoryContext oldContext;
+ /*
+ * Protect against code paths that may fail to initialize transition table
+ * info.
+ */
+ Assert(((TRIGGER_FIRED_BY_INSERT(trigdata->tg_event) ||
+ TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event) ||
+ TRIGGER_FIRED_BY_DELETE(trigdata->tg_event)) &&
+ TRIGGER_FIRED_AFTER(trigdata->tg_event) &&
+ !(trigdata->tg_event & AFTER_TRIGGER_DEFERRABLE) &&
+ !(trigdata->tg_event & AFTER_TRIGGER_INITDEFERRED)) ||
+ (trigdata->tg_oldtable == NULL && trigdata->tg_newtable == NULL));
+
finfo += tgindx;
/*
@@ -1960,6 +2120,8 @@ ExecBSInsertTriggers(EState *estate, ResultRelInfo *relinfo)
LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
LocTriggerData.tg_trigtuple = NULL;
LocTriggerData.tg_newtuple = NULL;
+ LocTriggerData.tg_oldtable = NULL;
+ LocTriggerData.tg_newtable = NULL;
LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
LocTriggerData.tg_newtuplebuf = InvalidBuffer;
for (i = 0; i < trigdesc->numtriggers; i++)
@@ -2017,6 +2179,8 @@ ExecBRInsertTriggers(EState *estate, ResultRelInfo *relinfo,
TRIGGER_EVENT_BEFORE;
LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
LocTriggerData.tg_newtuple = NULL;
+ LocTriggerData.tg_oldtable = NULL;
+ LocTriggerData.tg_newtable = NULL;
LocTriggerData.tg_newtuplebuf = InvalidBuffer;
for (i = 0; i < trigdesc->numtriggers; i++)
{
@@ -2070,7 +2234,8 @@ ExecARInsertTriggers(EState *estate, ResultRelInfo *relinfo,
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
- if (trigdesc && trigdesc->trig_insert_after_row)
+ if (trigdesc &&
+ (trigdesc->trig_insert_after_row || trigdesc->trig_insert_new_table))
AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_INSERT,
true, NULL, trigtuple, recheckIndexes, NULL);
}
@@ -2092,6 +2257,8 @@ ExecIRInsertTriggers(EState *estate, ResultRelInfo *relinfo,
TRIGGER_EVENT_INSTEAD;
LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
LocTriggerData.tg_newtuple = NULL;
+ LocTriggerData.tg_oldtable = NULL;
+ LocTriggerData.tg_newtable = NULL;
LocTriggerData.tg_newtuplebuf = InvalidBuffer;
for (i = 0; i < trigdesc->numtriggers; i++)
{
@@ -2159,6 +2326,8 @@ ExecBSDeleteTriggers(EState *estate, ResultRelInfo *relinfo)
LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
LocTriggerData.tg_trigtuple = NULL;
LocTriggerData.tg_newtuple = NULL;
+ LocTriggerData.tg_oldtable = NULL;
+ LocTriggerData.tg_newtable = NULL;
LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
LocTriggerData.tg_newtuplebuf = InvalidBuffer;
for (i = 0; i < trigdesc->numtriggers; i++)
@@ -2230,6 +2399,8 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
TRIGGER_EVENT_BEFORE;
LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
LocTriggerData.tg_newtuple = NULL;
+ LocTriggerData.tg_oldtable = NULL;
+ LocTriggerData.tg_newtable = NULL;
LocTriggerData.tg_newtuplebuf = InvalidBuffer;
for (i = 0; i < trigdesc->numtriggers; i++)
{
@@ -2273,7 +2444,8 @@ ExecARDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
- if (trigdesc && trigdesc->trig_delete_after_row)
+ if (trigdesc &&
+ (trigdesc->trig_delete_after_row || trigdesc->trig_delete_old_table))
{
HeapTuple trigtuple;
@@ -2310,6 +2482,8 @@ ExecIRDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
TRIGGER_EVENT_INSTEAD;
LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
LocTriggerData.tg_newtuple = NULL;
+ LocTriggerData.tg_oldtable = NULL;
+ LocTriggerData.tg_newtable = NULL;
LocTriggerData.tg_newtuplebuf = InvalidBuffer;
for (i = 0; i < trigdesc->numtriggers; i++)
{
@@ -2363,6 +2537,8 @@ ExecBSUpdateTriggers(EState *estate, ResultRelInfo *relinfo)
LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
LocTriggerData.tg_trigtuple = NULL;
LocTriggerData.tg_newtuple = NULL;
+ LocTriggerData.tg_oldtable = NULL;
+ LocTriggerData.tg_newtable = NULL;
LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
LocTriggerData.tg_newtuplebuf = InvalidBuffer;
for (i = 0; i < trigdesc->numtriggers; i++)
@@ -2464,6 +2640,8 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
TRIGGER_EVENT_ROW |
TRIGGER_EVENT_BEFORE;
LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+ LocTriggerData.tg_oldtable = NULL;
+ LocTriggerData.tg_newtable = NULL;
updatedCols = GetUpdatedColumns(relinfo, estate);
for (i = 0; i < trigdesc->numtriggers; i++)
{
@@ -2528,7 +2706,8 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
- if (trigdesc && trigdesc->trig_update_after_row)
+ if (trigdesc && (trigdesc->trig_update_after_row ||
+ trigdesc->trig_update_old_table || trigdesc->trig_update_new_table))
{
HeapTuple trigtuple;
@@ -2567,6 +2746,8 @@ ExecIRUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
TRIGGER_EVENT_ROW |
TRIGGER_EVENT_INSTEAD;
LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+ LocTriggerData.tg_oldtable = NULL;
+ LocTriggerData.tg_newtable = NULL;
for (i = 0; i < trigdesc->numtriggers; i++)
{
Trigger *trigger = &trigdesc->triggers[i];
@@ -2635,6 +2816,8 @@ ExecBSTruncateTriggers(EState *estate, ResultRelInfo *relinfo)
LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
LocTriggerData.tg_trigtuple = NULL;
LocTriggerData.tg_newtuple = NULL;
+ LocTriggerData.tg_oldtable = NULL;
+ LocTriggerData.tg_newtable = NULL;
LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
LocTriggerData.tg_newtuplebuf = InvalidBuffer;
for (i = 0; i < trigdesc->numtriggers; i++)
@@ -3163,8 +3346,11 @@ typedef struct AfterTriggerEventList
* fdw_tuplestores[query_depth] is a tuplestore containing the foreign tuples
* needed for the current query.
*
- * maxquerydepth is just the allocated length of query_stack and
- * fdw_tuplestores.
+ * old_tuplestores[query_depth] and new_tuplestores[query_depth] hold the
+ * transition relations for the current query.
+ *
+ * maxquerydepth is just the allocated length of query_stack and the
+ * tuplestores.
*
* state_stack is a stack of pointers to saved copies of the SET CONSTRAINTS
* state data; each subtransaction level that modifies that state first
@@ -3193,7 +3379,9 @@ typedef struct AfterTriggersData
AfterTriggerEventList events; /* deferred-event list */
int query_depth; /* current query list index */
AfterTriggerEventList *query_stack; /* events pending from each query */
- Tuplestorestate **fdw_tuplestores; /* foreign tuples from each query */
+ Tuplestorestate **fdw_tuplestores; /* foreign tuples for one row from each query */
+ Tuplestorestate **old_tuplestores; /* all old tuples from each query */
+ Tuplestorestate **new_tuplestores; /* all new tuples from each query */
int maxquerydepth; /* allocated len of above array */
MemoryContext event_cxt; /* memory context for events, if any */
@@ -3222,14 +3410,16 @@ static SetConstraintState SetConstraintStateAddItem(SetConstraintState state,
/*
- * Gets the current query fdw tuplestore and initializes it if necessary
+ * Gets a current query transition tuplestore and initializes it if necessary.
+ * This can be holding a single transition row tuple (in the case of an FDW)
+ * or a transition table (for an AFTER trigger).
*/
static Tuplestorestate *
-GetCurrentFDWTuplestore(void)
+GetTriggerTransitionTuplestore(Tuplestorestate **tss)
{
Tuplestorestate *ret;
- ret = afterTriggers.fdw_tuplestores[afterTriggers.query_depth];
+ ret = tss[afterTriggers.query_depth];
if (ret == NULL)
{
MemoryContext oldcxt;
@@ -3256,7 +3446,7 @@ GetCurrentFDWTuplestore(void)
CurrentResourceOwner = saveResourceOwner;
MemoryContextSwitchTo(oldcxt);
- afterTriggers.fdw_tuplestores[afterTriggers.query_depth] = ret;
+ tss[afterTriggers.query_depth] = ret;
}
return ret;
@@ -3554,7 +3744,9 @@ AfterTriggerExecute(AfterTriggerEvent event,
{
case AFTER_TRIGGER_FDW_FETCH:
{
- Tuplestorestate *fdw_tuplestore = GetCurrentFDWTuplestore();
+ Tuplestorestate *fdw_tuplestore =
+ GetTriggerTransitionTuplestore
+ (afterTriggers.fdw_tuplestores);
if (!tuplestore_gettupleslot(fdw_tuplestore, true, false,
trig_tuple_slot1))
@@ -3623,6 +3815,20 @@ AfterTriggerExecute(AfterTriggerEvent event,
}
}
+ /*
+ * Set up the tuplestore information.
+ */
+ if (LocTriggerData.tg_trigger->tgoldtable)
+ LocTriggerData.tg_oldtable =
+ GetTriggerTransitionTuplestore(afterTriggers.old_tuplestores);
+ else
+ LocTriggerData.tg_oldtable = NULL;
+ if (LocTriggerData.tg_trigger->tgnewtable)
+ LocTriggerData.tg_newtable =
+ GetTriggerTransitionTuplestore(afterTriggers.new_tuplestores);
+ else
+ LocTriggerData.tg_newtable = NULL;
+
/*
* Setup the remaining trigger information
*/
@@ -3912,6 +4118,8 @@ AfterTriggerBeginXact(void)
Assert(afterTriggers.state == NULL);
Assert(afterTriggers.query_stack == NULL);
Assert(afterTriggers.fdw_tuplestores == NULL);
+ Assert(afterTriggers.old_tuplestores == NULL);
+ Assert(afterTriggers.new_tuplestores == NULL);
Assert(afterTriggers.maxquerydepth == 0);
Assert(afterTriggers.event_cxt == NULL);
Assert(afterTriggers.events.head == NULL);
@@ -3956,6 +4164,8 @@ AfterTriggerEndQuery(EState *estate)
{
AfterTriggerEventList *events;
Tuplestorestate *fdw_tuplestore;
+ Tuplestorestate *old_tuplestore;
+ Tuplestorestate *new_tuplestore;
/* Must be inside a query, too */
Assert(afterTriggers.query_depth >= 0);
@@ -4014,6 +4224,18 @@ AfterTriggerEndQuery(EState *estate)
tuplestore_end(fdw_tuplestore);
afterTriggers.fdw_tuplestores[afterTriggers.query_depth] = NULL;
}
+ old_tuplestore = afterTriggers.old_tuplestores[afterTriggers.query_depth];
+ if (old_tuplestore)
+ {
+ tuplestore_end(old_tuplestore);
+ afterTriggers.old_tuplestores[afterTriggers.query_depth] = NULL;
+ }
+ new_tuplestore = afterTriggers.new_tuplestores[afterTriggers.query_depth];
+ if (new_tuplestore)
+ {
+ tuplestore_end(new_tuplestore);
+ afterTriggers.new_tuplestores[afterTriggers.query_depth] = NULL;
+ }
afterTriggerFreeEventList(&afterTriggers.query_stack[afterTriggers.query_depth]);
afterTriggers.query_depth--;
@@ -4127,6 +4349,8 @@ AfterTriggerEndXact(bool isCommit)
*/
afterTriggers.query_stack = NULL;
afterTriggers.fdw_tuplestores = NULL;
+ afterTriggers.old_tuplestores = NULL;
+ afterTriggers.new_tuplestores = NULL;
afterTriggers.maxquerydepth = 0;
afterTriggers.state = NULL;
@@ -4259,6 +4483,18 @@ AfterTriggerEndSubXact(bool isCommit)
tuplestore_end(ts);
afterTriggers.fdw_tuplestores[afterTriggers.query_depth] = NULL;
}
+ ts = afterTriggers.old_tuplestores[afterTriggers.query_depth];
+ if (ts)
+ {
+ tuplestore_end(ts);
+ afterTriggers.old_tuplestores[afterTriggers.query_depth] = NULL;
+ }
+ ts = afterTriggers.new_tuplestores[afterTriggers.query_depth];
+ if (ts)
+ {
+ tuplestore_end(ts);
+ afterTriggers.new_tuplestores[afterTriggers.query_depth] = NULL;
+ }
afterTriggerFreeEventList(&afterTriggers.query_stack[afterTriggers.query_depth]);
}
@@ -4338,6 +4574,12 @@ AfterTriggerEnlargeQueryState(void)
afterTriggers.fdw_tuplestores = (Tuplestorestate **)
MemoryContextAllocZero(TopTransactionContext,
new_alloc * sizeof(Tuplestorestate *));
+ afterTriggers.old_tuplestores = (Tuplestorestate **)
+ MemoryContextAllocZero(TopTransactionContext,
+ new_alloc * sizeof(Tuplestorestate *));
+ afterTriggers.new_tuplestores = (Tuplestorestate **)
+ MemoryContextAllocZero(TopTransactionContext,
+ new_alloc * sizeof(Tuplestorestate *));
afterTriggers.maxquerydepth = new_alloc;
}
else
@@ -4353,9 +4595,19 @@ AfterTriggerEnlargeQueryState(void)
afterTriggers.fdw_tuplestores = (Tuplestorestate **)
repalloc(afterTriggers.fdw_tuplestores,
new_alloc * sizeof(Tuplestorestate *));
+ afterTriggers.old_tuplestores = (Tuplestorestate **)
+ repalloc(afterTriggers.old_tuplestores,
+ new_alloc * sizeof(Tuplestorestate *));
+ afterTriggers.new_tuplestores = (Tuplestorestate **)
+ repalloc(afterTriggers.new_tuplestores,
+ new_alloc * sizeof(Tuplestorestate *));
/* Clear newly-allocated slots for subsequent lazy initialization. */
memset(afterTriggers.fdw_tuplestores + old_alloc,
0, (new_alloc - old_alloc) * sizeof(Tuplestorestate *));
+ memset(afterTriggers.old_tuplestores + old_alloc,
+ 0, (new_alloc - old_alloc) * sizeof(Tuplestorestate *));
+ memset(afterTriggers.new_tuplestores + old_alloc,
+ 0, (new_alloc - old_alloc) * sizeof(Tuplestorestate *));
afterTriggers.maxquerydepth = new_alloc;
}
@@ -4800,7 +5052,14 @@ AfterTriggerPendingOnRel(Oid relid)
*
* NOTE: this is called whenever there are any triggers associated with
* the event (even if they are disabled). This function decides which
- * triggers actually need to be queued.
+ * triggers actually need to be queued. It is also called after each row,
+ * even if there are no triggers for that event, if there are any AFTER
+ * STATEMENT triggers for the statement which use transition tables, so that
+ * the transition tuplestores can be built.
+ *
+ * Transition tuplestores are built now, rather than when events are pulled
+ * off of the queue because AFTER ROW triggers are allowed to select from the
+ * transition tables for the statement.
* ----------
*/
static void
@@ -4831,6 +5090,46 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
if (afterTriggers.query_depth >= afterTriggers.maxquerydepth)
AfterTriggerEnlargeQueryState();
+ /*
+ * If the relation has AFTER ... FOR EACH ROW triggers, capture rows into
+ * transition tuplestores for this depth.
+ */
+ if (row_trigger)
+ {
+ if ((event == TRIGGER_EVENT_DELETE &&
+ trigdesc->trig_delete_old_table) ||
+ (event == TRIGGER_EVENT_UPDATE &&
+ trigdesc->trig_update_old_table))
+ {
+ Tuplestorestate *old_tuplestore;
+
+ Assert(oldtup != NULL);
+ old_tuplestore =
+ GetTriggerTransitionTuplestore
+ (afterTriggers.old_tuplestores);
+ tuplestore_puttuple(old_tuplestore, oldtup);
+ }
+ if ((event == TRIGGER_EVENT_INSERT &&
+ trigdesc->trig_insert_new_table) ||
+ (event == TRIGGER_EVENT_UPDATE &&
+ trigdesc->trig_update_new_table))
+ {
+ Tuplestorestate *new_tuplestore;
+
+ Assert(newtup != NULL);
+ new_tuplestore =
+ GetTriggerTransitionTuplestore
+ (afterTriggers.new_tuplestores);
+ tuplestore_puttuple(new_tuplestore, newtup);
+ }
+
+ /* If transition tables are the only reason we're here, return. */
+ if ((event == TRIGGER_EVENT_DELETE && !trigdesc->trig_delete_after_row) ||
+ (event == TRIGGER_EVENT_INSERT && !trigdesc->trig_insert_after_row) ||
+ (event == TRIGGER_EVENT_UPDATE && !trigdesc->trig_update_after_row))
+ return;
+ }
+
/*
* Validate the event code and collect the associated tuple CTIDs.
*
@@ -4928,7 +5227,9 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
{
if (fdw_tuplestore == NULL)
{
- fdw_tuplestore = GetCurrentFDWTuplestore();
+ fdw_tuplestore =
+ GetTriggerTransitionTuplestore
+ (afterTriggers.fdw_tuplestores);
new_event.ate_flags = AFTER_TRIGGER_FDW_FETCH;
}
else
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 71714bc1d6..04e49b7795 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -2718,6 +2718,18 @@ _copyRoleSpec(const RoleSpec *from)
return newnode;
}
+static TriggerTransition *
+_copyTriggerTransition(const TriggerTransition *from)
+{
+ TriggerTransition *newnode = makeNode(TriggerTransition);
+
+ COPY_STRING_FIELD(name);
+ COPY_SCALAR_FIELD(isNew);
+ COPY_SCALAR_FIELD(isTable);
+
+ return newnode;
+}
+
static Query *
_copyQuery(const Query *from)
{
@@ -3893,6 +3905,7 @@ _copyCreateTrigStmt(const CreateTrigStmt *from)
COPY_NODE_FIELD(columns);
COPY_NODE_FIELD(whenClause);
COPY_SCALAR_FIELD(isconstraint);
+ COPY_NODE_FIELD(transitionRels);
COPY_SCALAR_FIELD(deferrable);
COPY_SCALAR_FIELD(initdeferred);
COPY_NODE_FIELD(constrrel);
@@ -5088,6 +5101,9 @@ copyObject(const void *from)
case T_RoleSpec:
retval = _copyRoleSpec(from);
break;
+ case T_TriggerTransition:
+ retval = _copyTriggerTransition(from);
+ break;
/*
* MISCELLANEOUS NODES
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 29a090fc48..2eaf41c37f 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -1905,6 +1905,7 @@ _equalCreateTrigStmt(const CreateTrigStmt *a, const CreateTrigStmt *b)
COMPARE_NODE_FIELD(columns);
COMPARE_NODE_FIELD(whenClause);
COMPARE_SCALAR_FIELD(isconstraint);
+ COMPARE_NODE_FIELD(transitionRels);
COMPARE_SCALAR_FIELD(deferrable);
COMPARE_SCALAR_FIELD(initdeferred);
COMPARE_NODE_FIELD(constrrel);
@@ -2634,6 +2635,16 @@ _equalRoleSpec(const RoleSpec *a, const RoleSpec *b)
return true;
}
+static bool
+_equalTriggerTransition(const TriggerTransition *a, const TriggerTransition *b)
+{
+ COMPARE_STRING_FIELD(name);
+ COMPARE_SCALAR_FIELD(isNew);
+ COMPARE_SCALAR_FIELD(isTable);
+
+ return true;
+}
+
/*
* Stuff from pg_list.h
*/
@@ -3387,6 +3398,9 @@ equal(const void *a, const void *b)
case T_RoleSpec:
retval = _equalRoleSpec(a, b);
break;
+ case T_TriggerTransition:
+ retval = _equalTriggerTransition(a, b);
+ break;
default:
elog(ERROR, "unrecognized node type: %d",
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index ae869547f3..748b687929 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -2561,6 +2561,16 @@ _outXmlSerialize(StringInfo str, const XmlSerialize *node)
WRITE_LOCATION_FIELD(location);
}
+static void
+_outTriggerTransition(StringInfo str, const TriggerTransition *node)
+{
+ WRITE_NODE_TYPE("TRIGGERTRANSITION");
+
+ WRITE_STRING_FIELD(name);
+ WRITE_BOOL_FIELD(isNew);
+ WRITE_BOOL_FIELD(isTable);
+}
+
static void
_outColumnDef(StringInfo str, const ColumnDef *node)
{
@@ -3852,6 +3862,9 @@ outNode(StringInfo str, const void *obj)
case T_ForeignKeyCacheInfo:
_outForeignKeyCacheInfo(str, obj);
break;
+ case T_TriggerTransition:
+ _outTriggerTransition(str, obj);
+ break;
default:
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 5547fc8658..0ec1cd345b 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -310,6 +310,9 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%type TriggerEvents TriggerOneEvent
%type TriggerFuncArg
%type TriggerWhen
+%type TransitionRelName
+%type TransitionRowOrTable TransitionOldOrNew
+%type TriggerTransition
%type event_trigger_when_list event_trigger_value_list
%type event_trigger_when_item
@@ -374,6 +377,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
create_generic_options alter_generic_options
relation_expr_list dostmt_opt_list
transform_element_list transform_type_list
+ TriggerTransitions TriggerReferencing
%type group_by_list
%type group_by_item empty_grouping_set rollup_clause cube_clause
@@ -610,11 +614,11 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
MAPPING MATCH MATERIALIZED MAXVALUE METHOD MINUTE_P MINVALUE MODE MONTH_P MOVE
- NAME_P NAMES NATIONAL NATURAL NCHAR NEXT NO NONE
+ NAME_P NAMES NATIONAL NATURAL NCHAR NEW NEXT NO NONE
NOT NOTHING NOTIFY NOTNULL NOWAIT NULL_P NULLIF
NULLS_P NUMERIC
- OBJECT_P OF OFF OFFSET OIDS ON ONLY OPERATOR OPTION OPTIONS OR
+ OBJECT_P OF OFF OFFSET OIDS OLD ON ONLY OPERATOR OPTION OPTIONS OR
ORDER ORDINALITY OUT_P OUTER_P OVER OVERLAPS OVERLAY OWNED OWNER
PARALLEL PARSER PARTIAL PARTITION PASSING PASSWORD PLACING PLANS POLICY
@@ -623,8 +627,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
QUOTE
- RANGE READ REAL REASSIGN RECHECK RECURSIVE REF REFERENCES REFRESH REINDEX
- RELATIVE_P RELEASE RENAME REPEATABLE REPLACE REPLICA
+ RANGE READ REAL REASSIGN RECHECK RECURSIVE REF REFERENCES REFERENCING
+ REFRESH REINDEX RELATIVE_P RELEASE RENAME REPEATABLE REPLACE REPLICA
RESET RESTART RESTRICT RETURNING RETURNS REVOKE RIGHT ROLE ROLLBACK ROLLUP
ROW ROWS RULE
@@ -4748,19 +4752,20 @@ CreateAmStmt: CREATE ACCESS METHOD name TYPE_P INDEX HANDLER handler_name
CreateTrigStmt:
CREATE TRIGGER name TriggerActionTime TriggerEvents ON
- qualified_name TriggerForSpec TriggerWhen
+ qualified_name TriggerReferencing TriggerForSpec TriggerWhen
EXECUTE PROCEDURE func_name '(' TriggerFuncArgs ')'
{
CreateTrigStmt *n = makeNode(CreateTrigStmt);
n->trigname = $3;
n->relation = $7;
- n->funcname = $12;
- n->args = $14;
- n->row = $8;
+ n->funcname = $13;
+ n->args = $15;
+ n->row = $9;
n->timing = $4;
n->events = intVal(linitial($5));
n->columns = (List *) lsecond($5);
- n->whenClause = $9;
+ n->whenClause = $10;
+ n->transitionRels = $8;
n->isconstraint = FALSE;
n->deferrable = FALSE;
n->initdeferred = FALSE;
@@ -4782,6 +4787,7 @@ CreateTrigStmt:
n->events = intVal(linitial($6));
n->columns = (List *) lsecond($6);
n->whenClause = $14;
+ n->transitionRels = NIL;
n->isconstraint = TRUE;
processCASbits($10, @10, "TRIGGER",
&n->deferrable, &n->initdeferred, NULL,
@@ -4834,6 +4840,49 @@ TriggerOneEvent:
{ $$ = list_make2(makeInteger(TRIGGER_TYPE_TRUNCATE), NIL); }
;
+TriggerReferencing:
+ REFERENCING TriggerTransitions { $$ = $2; }
+ | /*EMPTY*/ { $$ = NIL; }
+ ;
+
+TriggerTransitions:
+ TriggerTransition { $$ = list_make1($1); }
+ | TriggerTransitions TriggerTransition { $$ = lappend($1, $2); }
+ ;
+
+TriggerTransition:
+ TransitionOldOrNew TransitionRowOrTable opt_as TransitionRelName
+ {
+ TriggerTransition *n = makeNode(TriggerTransition);
+ n->name = $4;
+ n->isNew = $1;
+ n->isTable = $2;
+ $$ = (Node *)n;
+ }
+ ;
+
+TransitionOldOrNew:
+ NEW { $$ = TRUE; }
+ | OLD { $$ = FALSE; }
+ ;
+
+TransitionRowOrTable:
+ TABLE { $$ = TRUE; }
+ /*
+ * According to the standard, lack of a keyword here implies ROW.
+ * Support for that would require prohibiting ROW entirely here,
+ * reserving the keyword ROW, and/or requiring AS (instead of
+ * allowing it to be optional, as the standard specifies) as the
+ * next token. Requiring ROW seems cleanest and easiest to
+ * explain.
+ */
+ | ROW { $$ = FALSE; }
+ ;
+
+TransitionRelName:
+ ColId { $$ = $1; }
+ ;
+
TriggerForSpec:
FOR TriggerForOptEach TriggerForType
{
@@ -13810,6 +13859,7 @@ unreserved_keyword:
| MOVE
| NAME_P
| NAMES
+ | NEW
| NEXT
| NO
| NOTHING
@@ -13820,6 +13870,7 @@ unreserved_keyword:
| OF
| OFF
| OIDS
+ | OLD
| OPERATOR
| OPTION
| OPTIONS
@@ -13851,6 +13902,7 @@ unreserved_keyword:
| RECHECK
| RECURSIVE
| REF
+ | REFERENCING
| REFRESH
| REINDEX
| RELATIVE_P
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 8a81d7a078..a3a4174abf 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -813,6 +813,8 @@ pg_get_triggerdef_worker(Oid trigid, bool pretty)
SysScanDesc tgscan;
int findx = 0;
char *tgname;
+ char *tgoldtable;
+ char *tgnewtable;
Oid argtypes[1]; /* dummy */
Datum value;
bool isnull;
@@ -924,6 +926,27 @@ pg_get_triggerdef_worker(Oid trigid, bool pretty)
appendStringInfoString(&buf, "IMMEDIATE ");
}
+ value = fastgetattr(ht_trig, Anum_pg_trigger_tgoldtable,
+ tgrel->rd_att, &isnull);
+ if (!isnull)
+ tgoldtable = NameStr(*((NameData *) DatumGetPointer(value)));
+ else
+ tgoldtable = NULL;
+ value = fastgetattr(ht_trig, Anum_pg_trigger_tgnewtable,
+ tgrel->rd_att, &isnull);
+ if (!isnull)
+ tgnewtable = NameStr(*((NameData *) DatumGetPointer(value)));
+ else
+ tgnewtable = NULL;
+ if (tgoldtable != NULL || tgnewtable != NULL)
+ {
+ appendStringInfoString(&buf, "REFERENCING ");
+ if (tgoldtable != NULL)
+ appendStringInfo(&buf, "OLD TABLE AS %s ", tgoldtable);
+ if (tgnewtable != NULL)
+ appendStringInfo(&buf, "NEW TABLE AS %s ", tgnewtable);
+ }
+
if (TRIGGER_FOR_ROW(trigrec->tgtype))
appendStringInfoString(&buf, "FOR EACH ROW ");
else
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index cd3048db86..880559650a 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201610201
+#define CATALOG_VERSION_NO 201611041
#endif
diff --git a/src/include/catalog/pg_trigger.h b/src/include/catalog/pg_trigger.h
index eb39c50e63..da6a7f3a2e 100644
--- a/src/include/catalog/pg_trigger.h
+++ b/src/include/catalog/pg_trigger.h
@@ -59,6 +59,8 @@ CATALOG(pg_trigger,2620)
#ifdef CATALOG_VARLEN
bytea tgargs BKI_FORCE_NOT_NULL; /* first\000second\000tgnargs\000 */
pg_node_tree tgqual; /* WHEN expression, or NULL if none */
+ NameData tgoldtable; /* old transition table, or NULL if none */
+ NameData tgnewtable; /* new transition table, or NULL if none */
#endif
} FormData_pg_trigger;
@@ -73,7 +75,7 @@ typedef FormData_pg_trigger *Form_pg_trigger;
* compiler constants for pg_trigger
* ----------------
*/
-#define Natts_pg_trigger 15
+#define Natts_pg_trigger 17
#define Anum_pg_trigger_tgrelid 1
#define Anum_pg_trigger_tgname 2
#define Anum_pg_trigger_tgfoid 3
@@ -89,6 +91,8 @@ typedef FormData_pg_trigger *Form_pg_trigger;
#define Anum_pg_trigger_tgattr 13
#define Anum_pg_trigger_tgargs 14
#define Anum_pg_trigger_tgqual 15
+#define Anum_pg_trigger_tgoldtable 16
+#define Anum_pg_trigger_tgnewtable 17
/* Bits within tgtype */
#define TRIGGER_TYPE_ROW (1 << 0)
@@ -142,4 +146,11 @@ typedef FormData_pg_trigger *Form_pg_trigger;
#define TRIGGER_TYPE_MATCHES(type, level, timing, event) \
(((type) & (TRIGGER_TYPE_LEVEL_MASK | TRIGGER_TYPE_TIMING_MASK | (event))) == ((level) | (timing) | (event)))
+/*
+ * Macro to determine whether tgnewtable or tgoldtable has been specified for
+ * a trigger.
+ */
+#define TRIGGER_USES_TRANSITION_TABLE(namepointer) \
+ ((namepointer) != (char *) NULL)
+
#endif /* PG_TRIGGER_H */
diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h
index 0ed7c86eb2..c6e3e2c234 100644
--- a/src/include/commands/trigger.h
+++ b/src/include/commands/trigger.h
@@ -37,6 +37,8 @@ typedef struct TriggerData
Trigger *tg_trigger;
Buffer tg_trigtuplebuf;
Buffer tg_newtuplebuf;
+ Tuplestorestate *tg_oldtable;
+ Tuplestorestate *tg_newtable;
} TriggerData;
/*
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 88297bbe80..cb9307cd00 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -453,6 +453,7 @@ typedef enum NodeTag
T_OnConflictClause,
T_CommonTableExpr,
T_RoleSpec,
+ T_TriggerTransition,
/*
* TAGS FOR REPLICATION GRAMMAR PARSE NODES (replnodes.h)
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 6de2cab6b2..9b600a5f76 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -1204,6 +1204,21 @@ typedef struct CommonTableExpr
((Query *) (cte)->ctequery)->targetList : \
((Query *) (cte)->ctequery)->returningList)
+/*
+ * TriggerTransition -
+ * representation of transition row or table naming clause
+ *
+ * Only transition tables are initially supported in the syntax, and only for
+ * AFTER triggers, but other permutations are accepted by the parser so we can
+ * give a meaningful message from C code.
+ */
+typedef struct TriggerTransition
+{
+ NodeTag type;
+ char *name;
+ bool isNew;
+ bool isTable;
+} TriggerTransition;
/*****************************************************************************
* Optimizable Statements
@@ -2105,6 +2120,8 @@ typedef struct CreateTrigStmt
List *columns; /* column names, or NIL for all columns */
Node *whenClause; /* qual expression, or NULL if none */
bool isconstraint; /* This is a constraint trigger */
+ /* explicitly named transition data */
+ List *transitionRels; /* TriggerTransition nodes, or NIL if none */
/* The remaining fields are only used for constraint triggers */
bool deferrable; /* [NOT] DEFERRABLE */
bool initdeferred; /* INITIALLY {DEFERRED|IMMEDIATE} */
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index 17ffef53a7..77d873beca 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -251,6 +251,7 @@ PG_KEYWORD("names", NAMES, UNRESERVED_KEYWORD)
PG_KEYWORD("national", NATIONAL, COL_NAME_KEYWORD)
PG_KEYWORD("natural", NATURAL, TYPE_FUNC_NAME_KEYWORD)
PG_KEYWORD("nchar", NCHAR, COL_NAME_KEYWORD)
+PG_KEYWORD("new", NEW, UNRESERVED_KEYWORD)
PG_KEYWORD("next", NEXT, UNRESERVED_KEYWORD)
PG_KEYWORD("no", NO, UNRESERVED_KEYWORD)
PG_KEYWORD("none", NONE, COL_NAME_KEYWORD)
@@ -268,6 +269,7 @@ PG_KEYWORD("of", OF, UNRESERVED_KEYWORD)
PG_KEYWORD("off", OFF, UNRESERVED_KEYWORD)
PG_KEYWORD("offset", OFFSET, RESERVED_KEYWORD)
PG_KEYWORD("oids", OIDS, UNRESERVED_KEYWORD)
+PG_KEYWORD("old", OLD, UNRESERVED_KEYWORD)
PG_KEYWORD("on", ON, RESERVED_KEYWORD)
PG_KEYWORD("only", ONLY, RESERVED_KEYWORD)
PG_KEYWORD("operator", OPERATOR, UNRESERVED_KEYWORD)
@@ -313,6 +315,7 @@ PG_KEYWORD("recheck", RECHECK, UNRESERVED_KEYWORD)
PG_KEYWORD("recursive", RECURSIVE, UNRESERVED_KEYWORD)
PG_KEYWORD("ref", REF, UNRESERVED_KEYWORD)
PG_KEYWORD("references", REFERENCES, RESERVED_KEYWORD)
+PG_KEYWORD("referencing", REFERENCING, UNRESERVED_KEYWORD)
PG_KEYWORD("refresh", REFRESH, UNRESERVED_KEYWORD)
PG_KEYWORD("reindex", REINDEX, UNRESERVED_KEYWORD)
PG_KEYWORD("relative", RELATIVE_P, UNRESERVED_KEYWORD)
diff --git a/src/include/utils/reltrigger.h b/src/include/utils/reltrigger.h
index e87f2283ec..756b417128 100644
--- a/src/include/utils/reltrigger.h
+++ b/src/include/utils/reltrigger.h
@@ -39,6 +39,8 @@ typedef struct Trigger
int16 *tgattr;
char **tgargs;
char *tgqual;
+ char *tgoldtable;
+ char *tgnewtable;
} Trigger;
typedef struct TriggerDesc
@@ -68,6 +70,11 @@ typedef struct TriggerDesc
/* there are no row-level truncate triggers */
bool trig_truncate_before_statement;
bool trig_truncate_after_statement;
+ /* Is there at least one trigger specifying each transition relation? */
+ bool trig_insert_new_table;
+ bool trig_update_old_table;
+ bool trig_update_new_table;
+ bool trig_delete_old_table;
} TriggerDesc;
#endif /* RELTRIGGER_H */
--
cgit v1.2.3
From 20540710e83f2873707c284a0c0693f0b57156c4 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Fri, 4 Nov 2016 18:29:53 -0400
Subject: Delete contrib/xml2's legacy implementation of xml_is_well_formed().
This function is unreferenced in modern usage; it was superseded in 9.1
by a core function of the same name. It has been left in place in the C
code only so that pre-9.1 SQL definitions of the contrib/xml2 functions
would continue to work. Six years seems like enough time for people to
have updated to the extension-style version of the xml2 module, so let's
drop this.
The key reason for not keeping it any longer is that we want to stick
an explicit PGDLLEXPORT into PG_FUNCTION_INFO_V1(), and the similarity
of name to the core function creates a conflict that compilers will
complain about.
Extracted from a larger patch for that purpose. I'm committing this
change separately to give it more visibility in the commit logs.
While at it, remove the documentation entry that claimed that
xml_is_well_formed() is a function provided by contrib/xml2, and
instead mention the even more ancient alias xml_valid().
Laurenz Albe, doc change by me
Patch:
---
contrib/xml2/xpath.c | 45 ---------------------------------------------
doc/src/sgml/xml2.sgml | 10 +++++-----
2 files changed, 5 insertions(+), 50 deletions(-)
(limited to 'doc/src')
diff --git a/contrib/xml2/xpath.c b/contrib/xml2/xpath.c
index ac28996867..28445be369 100644
--- a/contrib/xml2/xpath.c
+++ b/contrib/xml2/xpath.c
@@ -81,51 +81,6 @@ pgxml_parser_init(PgXmlStrictness strictness)
}
-/*
- * Returns true if document is well-formed
- *
- * Note: this has been superseded by a core function. We still have to
- * have it in the contrib module so that existing SQL-level references
- * to the function won't fail; but in normal usage with up-to-date SQL
- * definitions for the contrib module, this won't be called.
- */
-
-PG_FUNCTION_INFO_V1(xml_is_well_formed);
-
-Datum
-xml_is_well_formed(PG_FUNCTION_ARGS)
-{
- text *t = PG_GETARG_TEXT_P(0); /* document buffer */
- bool result = false;
- int32 docsize = VARSIZE(t) - VARHDRSZ;
- xmlDocPtr doctree;
- PgXmlErrorContext *xmlerrcxt;
-
- xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
-
- PG_TRY();
- {
- doctree = xmlParseMemory((char *) VARDATA(t), docsize);
-
- result = (doctree != NULL);
-
- if (doctree != NULL)
- xmlFreeDoc(doctree);
- }
- PG_CATCH();
- {
- pg_xml_done(xmlerrcxt, true);
-
- PG_RE_THROW();
- }
- PG_END_TRY();
-
- pg_xml_done(xmlerrcxt, false);
-
- PG_RETURN_BOOL(result);
-}
-
-
/* Encodes special characters (<, >, &, " and \r) as XML entities */
PG_FUNCTION_INFO_V1(xml_encode_special_chars);
diff --git a/doc/src/sgml/xml2.sgml b/doc/src/sgml/xml2.sgml
index a40172c36d..9bbc9e75d7 100644
--- a/doc/src/sgml/xml2.sgml
+++ b/doc/src/sgml/xml2.sgml
@@ -53,7 +53,7 @@
- xml_is_well_formed(document)
+ xml_valid(document)
@@ -62,10 +62,10 @@
This parses the document text in its parameter and returns true if the
- document is well-formed XML. (Note: before PostgreSQL 8.2, this
- function was called xml_valid()>. That is the wrong name
- since validity and well-formedness have different meanings in XML.
- The old name is still available, but is deprecated.)
+ document is well-formed XML. (Note: this is an alias for the standard
+ PostgreSQL function xml_is_well_formed()>. The
+ name xml_valid()> is technically incorrect since validity
+ and well-formedness have different meanings in XML.)
--
cgit v1.2.3
From c8ead2a3974d3eada145a0e18940150039493cc9 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Fri, 4 Nov 2016 19:04:56 -0400
Subject: Provide DLLEXPORT markers for C functions via PG_FUNCTION_INFO_V1
macro.
Second try at the change originally made in commit 8518583cd;
this time with contrib updates so that manual extern declarations
are also marked with PGDLLEXPORT. The release notes should point
this out as a significant source-code change for extension authors,
since they'll have to make similar additions to avoid trouble on Windows.
Laurenz Albe, doc change by me
Patch:
---
contrib/hstore/hstore.h | 2 +-
contrib/ltree/ltree.h | 40 ++++++++++++++++++++--------------------
doc/src/sgml/xfunc.sgml | 17 +++++++++++++++++
src/include/fmgr.h | 7 +++----
4 files changed, 41 insertions(+), 25 deletions(-)
(limited to 'doc/src')
diff --git a/contrib/hstore/hstore.h b/contrib/hstore/hstore.h
index 6bab08b7de..6303fa4061 100644
--- a/contrib/hstore/hstore.h
+++ b/contrib/hstore/hstore.h
@@ -194,7 +194,7 @@ extern Pairs *hstoreArrayToPairs(ArrayType *a, int *npairs);
#if HSTORE_POLLUTE_NAMESPACE
#define HSTORE_POLLUTE(newname_,oldname_) \
PG_FUNCTION_INFO_V1(oldname_); \
- Datum newname_(PG_FUNCTION_ARGS); \
+ extern PGDLLEXPORT Datum newname_(PG_FUNCTION_ARGS); \
Datum oldname_(PG_FUNCTION_ARGS) { return newname_(fcinfo); } \
extern int no_such_variable
#else
diff --git a/contrib/ltree/ltree.h b/contrib/ltree/ltree.h
index c604357dbf..c7aa7f8818 100644
--- a/contrib/ltree/ltree.h
+++ b/contrib/ltree/ltree.h
@@ -130,30 +130,30 @@ typedef struct
/* use in array iterator */
-Datum ltree_isparent(PG_FUNCTION_ARGS);
-Datum ltree_risparent(PG_FUNCTION_ARGS);
-Datum ltq_regex(PG_FUNCTION_ARGS);
-Datum ltq_rregex(PG_FUNCTION_ARGS);
-Datum lt_q_regex(PG_FUNCTION_ARGS);
-Datum lt_q_rregex(PG_FUNCTION_ARGS);
-Datum ltxtq_exec(PG_FUNCTION_ARGS);
-Datum ltxtq_rexec(PG_FUNCTION_ARGS);
-Datum _ltq_regex(PG_FUNCTION_ARGS);
-Datum _ltq_rregex(PG_FUNCTION_ARGS);
-Datum _lt_q_regex(PG_FUNCTION_ARGS);
-Datum _lt_q_rregex(PG_FUNCTION_ARGS);
-Datum _ltxtq_exec(PG_FUNCTION_ARGS);
-Datum _ltxtq_rexec(PG_FUNCTION_ARGS);
-Datum _ltree_isparent(PG_FUNCTION_ARGS);
-Datum _ltree_risparent(PG_FUNCTION_ARGS);
+extern PGDLLEXPORT Datum ltree_isparent(PG_FUNCTION_ARGS);
+extern PGDLLEXPORT Datum ltree_risparent(PG_FUNCTION_ARGS);
+extern PGDLLEXPORT Datum ltq_regex(PG_FUNCTION_ARGS);
+extern PGDLLEXPORT Datum ltq_rregex(PG_FUNCTION_ARGS);
+extern PGDLLEXPORT Datum lt_q_regex(PG_FUNCTION_ARGS);
+extern PGDLLEXPORT Datum lt_q_rregex(PG_FUNCTION_ARGS);
+extern PGDLLEXPORT Datum ltxtq_exec(PG_FUNCTION_ARGS);
+extern PGDLLEXPORT Datum ltxtq_rexec(PG_FUNCTION_ARGS);
+extern PGDLLEXPORT Datum _ltq_regex(PG_FUNCTION_ARGS);
+extern PGDLLEXPORT Datum _ltq_rregex(PG_FUNCTION_ARGS);
+extern PGDLLEXPORT Datum _lt_q_regex(PG_FUNCTION_ARGS);
+extern PGDLLEXPORT Datum _lt_q_rregex(PG_FUNCTION_ARGS);
+extern PGDLLEXPORT Datum _ltxtq_exec(PG_FUNCTION_ARGS);
+extern PGDLLEXPORT Datum _ltxtq_rexec(PG_FUNCTION_ARGS);
+extern PGDLLEXPORT Datum _ltree_isparent(PG_FUNCTION_ARGS);
+extern PGDLLEXPORT Datum _ltree_risparent(PG_FUNCTION_ARGS);
/* Concatenation functions */
-Datum ltree_addltree(PG_FUNCTION_ARGS);
-Datum ltree_addtext(PG_FUNCTION_ARGS);
-Datum ltree_textadd(PG_FUNCTION_ARGS);
+extern PGDLLEXPORT Datum ltree_addltree(PG_FUNCTION_ARGS);
+extern PGDLLEXPORT Datum ltree_addtext(PG_FUNCTION_ARGS);
+extern PGDLLEXPORT Datum ltree_textadd(PG_FUNCTION_ARGS);
/* Util function */
-Datum ltree_in(PG_FUNCTION_ARGS);
+extern PGDLLEXPORT Datum ltree_in(PG_FUNCTION_ARGS);
bool ltree_execute(ITEM *curitem, void *checkval,
bool calcnot, bool (*chkcond) (void *checkval, ITEM *val));
diff --git a/doc/src/sgml/xfunc.sgml b/doc/src/sgml/xfunc.sgml
index de6a466efc..6060e61857 100644
--- a/doc/src/sgml/xfunc.sgml
+++ b/doc/src/sgml/xfunc.sgml
@@ -2577,6 +2577,23 @@ concat_text(PG_FUNCTION_ARGS)
error messages to this effect.
+
+
+
+ To work correctly on Windows, C>-language functions need
+ to be marked with PGDLLEXPORT>, unless you use a build
+ process that marks all global functions that way. In simple cases
+ this detail will be handled transparently by
+ the PG_FUNCTION_INFO_V1> macro. However, if you write
+ explicit external declarations (perhaps in header files), be sure
+ to write them like this:
+
+extern PGDLLEXPORT Datum funcname(PG_FUNCTION_ARGS);
+
+ or you'll get compiler complaints when building on Windows. (On
+ other platforms, the PGDLLEXPORT> macro does nothing.)
+
+
diff --git a/src/include/fmgr.h b/src/include/fmgr.h
index 0878418516..3668ac3f6e 100644
--- a/src/include/fmgr.h
+++ b/src/include/fmgr.h
@@ -350,12 +350,11 @@ typedef const Pg_finfo_record *(*PGFInfoFunction) (void);
*
* On Windows, the function and info function must be exported. Our normal
* build processes take care of that via .DEF files or --export-all-symbols.
- * Module authors using a different build process might need to manually
- * declare the function PGDLLEXPORT. We do that automatically here for the
- * info function, since authors shouldn't need to be explicitly aware of it.
+ * Module authors using a different build process might do it differently,
+ * so we declare these functions PGDLLEXPORT for their convenience.
*/
#define PG_FUNCTION_INFO_V1(funcname) \
-extern Datum funcname(PG_FUNCTION_ARGS); \
+extern PGDLLEXPORT Datum funcname(PG_FUNCTION_ARGS); \
extern PGDLLEXPORT const Pg_finfo_record * CppConcat(pg_finfo_,funcname)(void); \
const Pg_finfo_record * \
CppConcat(pg_finfo_,funcname) (void) \
--
cgit v1.2.3
From 6feb69f6cef8b1bd2829700e25e402f22e86f3bd Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Fri, 4 Nov 2016 12:00:00 -0400
Subject: doc: Port page header customizations to XSLT
---
doc/src/sgml/stylesheet.xsl | 138 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 138 insertions(+)
(limited to 'doc/src')
diff --git a/doc/src/sgml/stylesheet.xsl b/doc/src/sgml/stylesheet.xsl
index 39c9df28ad..42e8cce368 100644
--- a/doc/src/sgml/stylesheet.xsl
+++ b/doc/src/sgml/stylesheet.xsl
@@ -246,4 +246,142 @@ set toc,title
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--
cgit v1.2.3
From d49cc588ca589cd378b5862fa5704eaade4a1380 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Fri, 4 Nov 2016 12:00:00 -0400
Subject: doc: Don't reformat .fo files before processing by fop
This messes up the whitespace in the output PDF document in some places.
---
doc/src/sgml/Makefile | 10 +++-------
1 file changed, 3 insertions(+), 7 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/Makefile b/doc/src/sgml/Makefile
index 5df2f04dd6..84c94e8ae0 100644
--- a/doc/src/sgml/Makefile
+++ b/doc/src/sgml/Makefile
@@ -270,20 +270,16 @@ htmlhelp: stylesheet-hh.xsl postgres.xml
$(XMLLINT) --noout --valid postgres.xml
$(XSLTPROC) $(XSLTPROCFLAGS) $^
-%-A4.fo.tmp: stylesheet-fo.xsl %.xml
+%-A4.fo: stylesheet-fo.xsl %.xml
$(XMLLINT) --noout --valid $*.xml
$(XSLTPROC) $(XSLTPROCFLAGS) --stringparam paper.type A4 -o $@ $^
-%-US.fo.tmp: stylesheet-fo.xsl %.xml
+%-US.fo: stylesheet-fo.xsl %.xml
$(XMLLINT) --noout --valid $*.xml
$(XSLTPROC) $(XSLTPROCFLAGS) --stringparam paper.type USletter -o $@ $^
FOP = fop
-# reformat FO output so that locations of errors are easier to find
-%.fo: %.fo.tmp
- $(XMLLINT) --format --output $@ $^
-
.SECONDARY: postgres-A4.fo postgres-US.fo
%-fop.pdf: %.fo
@@ -404,7 +400,7 @@ clean:
# index
rm -f HTML.index $(GENERATED_SGML)
# XSLT
- rm -f postgres.xml postgres.xmltmp htmlhelp.hhp toc.hhc index.hhk *.fo *.fo.tmp
+ rm -f postgres.xml postgres.xmltmp htmlhelp.hhp toc.hhc index.hhk *.fo
# EPUB
rm -f postgres.epub
# Texinfo
--
cgit v1.2.3
From fd2664dcb71102a5d66d2453182c010fb219496c Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Sun, 6 Nov 2016 14:43:13 -0500
Subject: Rationalize and document pltcl's handling of magic ".tupno" array
element.
For a very long time, pltcl's spi_exec and spi_execp commands have had
a behavior of storing the current row number as an element of output
arrays, but this was never documented. Fix that.
For an equally long time, pltcl_trigger_handler had a behavior of silently
ignoring ".tupno" as an output column name, evidently so that the result
of spi_exec could be used directly as a trigger result tuple. Not sure
how useful that really is, but in any case it's bad that it would break
attempts to use ".tupno" as an actual column name. We can fix it by not
checking for ".tupno" until after we check for a column name match. This
comports with the effective behavior of spi_exec[p] that ".tupno" is only
magic when you don't have an actual column named that.
In passing, wordsmith the description of returning modified tuples from
a pltcl trigger.
Noted while working on Jim Nasby's patch to support composite results
from pltcl. The inability to return trigger tuples using ".tupno" as
a column name is a bug, so back-patch to all supported branches.
---
doc/src/sgml/pltcl.sgml | 54 ++++++++++++++++++++++++++++++++-----------------
src/pl/tcl/pltcl.c | 23 +++++++++++++--------
2 files changed, 50 insertions(+), 27 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/pltcl.sgml b/doc/src/sgml/pltcl.sgml
index 805cc89dc9..52fc44940c 100644
--- a/doc/src/sgml/pltcl.sgml
+++ b/doc/src/sgml/pltcl.sgml
@@ -296,20 +296,22 @@ $$ LANGUAGE pltcl;
If the command is a SELECT> statement, the values of the
result columns are placed into Tcl variables named after the columns.
If the -array> option is given, the column values are
- instead stored into the named associative array, with the
- column names used as array indexes.
+ instead stored into elements of the named associative array, with the
+ column names used as array indexes. In addition, the current row
+ number within the result (counting from zero) is stored into the array
+ element named .tupno>, unless that name is
+ in use as a column name in the result.
If the command is a SELECT> statement and no loop-body>
script is given, then only the first row of results are stored into
- Tcl variables; remaining rows, if any, are ignored. No storing occurs
- if the
- query returns no rows. (This case can be detected by checking the
- result of spi_exec.) For example:
+ Tcl variables or array elements; remaining rows, if any, are ignored.
+ No storing occurs if the query returns no rows. (This case can be
+ detected by checking the result of spi_exec.)
+ For example:
spi_exec "SELECT count(*) AS cnt FROM pg_proc"
-
will set the Tcl variable $cnt> to the number of rows in
the pg_proc> system catalog.
@@ -317,15 +319,15 @@ spi_exec "SELECT count(*) AS cnt FROM pg_proc"
If the optional loop-body> argument is given, it is
a piece of Tcl script that is executed once for each row in the
query result. (loop-body> is ignored if the given
- command is not a SELECT>.) The values of the current row's columns
- are stored into Tcl variables before each iteration. For example:
-
+ command is not a SELECT>.)
+ The values of the current row's columns
+ are stored into Tcl variables or array elements before each iteration.
+ For example:
spi_exec -array C "SELECT * FROM pg_class" {
elog DEBUG "have table $C(relname)"
}
-
will print a log message for every row of pg_class>. This
feature works similarly to other Tcl looping constructs; in
particular continue> and break> work in the
@@ -667,21 +669,35 @@ SELECT 'doesn''t' AS ret
The return value from a trigger procedure can be one of the strings
- OK> or SKIP>, or a list as returned by the
- array get> Tcl command. If the return value is OK>,
- the operation (INSERT>/UPDATE>/DELETE>) that fired the trigger will proceed
+ OK> or SKIP>, or a list of column name/value pairs.
+ If the return value is OK>,
+ the operation (INSERT>/UPDATE>/DELETE>)
+ that fired the trigger will proceed
normally. SKIP> tells the trigger manager to silently suppress
the operation for this row. If a list is returned, it tells PL/Tcl to
- return a modified row to the trigger manager. This is only meaningful
+ return a modified row to the trigger manager; the contents of the
+ modified row are specified by the column names and values in the list.
+ Any columns not mentioned in the list are set to null.
+ Returning a modified row is only meaningful
for row-level BEFORE> INSERT> or UPDATE>
- triggers for which the modified row will be inserted instead of the one
+ triggers, for which the modified row will be inserted instead of the one
given in $NEW>; or for row-level INSTEAD OF>
INSERT> or UPDATE> triggers where the returned row
- is used to support INSERT RETURNING> and
- UPDATE RETURNING> commands. The return value is ignored for
- other types of triggers.
+ is used as the source data for INSERT RETURNING> or
+ UPDATE RETURNING> clauses.
+ In row-level BEFORE> DELETE> or INSTEAD
+ OF> DELETE> triggers, returning a modified row has the same
+ effect as returning OK>, that is the operation proceeds.
+ The trigger return value is ignored for all other types of triggers.
+
+
+ The result list can be made from an array representation of the
+ modified tuple with the array get> Tcl command.
+
+
+
Here's a little example trigger procedure that forces an integer value
in a table to keep track of the number of updates that are performed on the
diff --git a/src/pl/tcl/pltcl.c b/src/pl/tcl/pltcl.c
index 9d72f47f59..44fcf68054 100644
--- a/src/pl/tcl/pltcl.c
+++ b/src/pl/tcl/pltcl.c
@@ -1118,21 +1118,23 @@ pltcl_trigger_handler(PG_FUNCTION_ARGS, bool pltrusted)
Oid typioparam;
FmgrInfo finfo;
- /************************************************************
- * Ignore ".tupno" pseudo elements (see pltcl_set_tuple_values)
- ************************************************************/
- if (strcmp(ret_name, ".tupno") == 0)
- continue;
-
/************************************************************
* Get the attribute number
+ *
+ * We silently ignore ".tupno", if it's present but doesn't match
+ * any actual output column. This allows direct use of a row
+ * returned by pltcl_set_tuple_values().
************************************************************/
attnum = SPI_fnumber(tupdesc, ret_name);
if (attnum == SPI_ERROR_NOATTRIBUTE)
+ {
+ if (strcmp(ret_name, ".tupno") == 0)
+ continue;
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("unrecognized attribute \"%s\"",
ret_name)));
+ }
if (attnum <= 0)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@@ -2703,8 +2705,7 @@ pltcl_set_tuple_values(Tcl_Interp *interp, const char *arrayname,
const char *nullname = NULL;
/************************************************************
- * Prepare pointers for Tcl_SetVar2() below and in array
- * mode set the .tupno element
+ * Prepare pointers for Tcl_SetVar2() below
************************************************************/
if (arrayname == NULL)
{
@@ -2715,6 +2716,12 @@ pltcl_set_tuple_values(Tcl_Interp *interp, const char *arrayname,
{
arrptr = &arrayname;
nameptr = &attname;
+
+ /*
+ * When outputting to an array, fill the ".tupno" element with the
+ * current tuple number. This will be overridden below if ".tupno" is
+ * in use as an actual field name in the rowtype.
+ */
Tcl_SetVar2Ex(interp, arrayname, ".tupno", Tcl_NewWideIntObj(tupno), 0);
}
--
cgit v1.2.3
From 26abb50c490dee191df21282bc940b94118550aa Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Sun, 6 Nov 2016 17:56:05 -0500
Subject: Support PL/Tcl functions that return composite types and/or sets.
Jim Nasby, rather heavily editorialized by me
Patch:
---
doc/src/sgml/pltcl.sgml | 73 ++++--
src/pl/tcl/expected/pltcl_queries.out | 61 +++++
src/pl/tcl/expected/pltcl_setup.out | 13 +
src/pl/tcl/pltcl.c | 430 +++++++++++++++++++++++++++++-----
src/pl/tcl/sql/pltcl_queries.sql | 33 +++
src/pl/tcl/sql/pltcl_setup.sql | 16 ++
6 files changed, 545 insertions(+), 81 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/pltcl.sgml b/doc/src/sgml/pltcl.sgml
index 52fc44940c..8afaf4ad36 100644
--- a/doc/src/sgml/pltcl.sgml
+++ b/doc/src/sgml/pltcl.sgml
@@ -94,11 +94,11 @@ $$ LANGUAGE pltcl;
The body of the function is simply a piece of Tcl script.
- When the function is called, the argument values are passed as
- variables $1 ... $n to the
- Tcl script. The result is returned
- from the Tcl code in the usual way, with a return
- statement.
+ When the function is called, the argument values are passed to the
+ Tcl script as variables named 1
+ ... n. The result is
+ returned from the Tcl code in the usual way, with
+ a return statement.
@@ -173,17 +173,57 @@ $$ LANGUAGE pltcl;
- There is currently no support for returning a composite-type
- result value, nor for returning sets.
+ PL/Tcl functions can return composite-type results, too. To do this,
+ the Tcl code must return a list of column name/value pairs matching
+ the expected result type. Any column names omitted from the list
+ are returned as nulls, and an error is raised if there are unexpected
+ column names. Here is an example:
+
+
+CREATE FUNCTION square_cube(in int, out squared int, out cubed int) AS $$
+ return [list squared [expr {$1 * $1}] cubed [expr {$1 * $1 * $1}]]
+$$ LANGUAGE pltcl;
+
+
+
+ The result list can be made from an array representation of the
+ desired tuple with the array get> Tcl command. For example:
+
+
+CREATE FUNCTION raise_pay(employee, delta int) RETURNS employee AS $$
+ set 1(salary) [expr {$1(salary) + $2}]
+ return [array get 1]
+$$ LANGUAGE pltcl;
+
+
+
+
- PL/Tcl> does not currently have full support for
- domain types: it treats a domain the same as the underlying scalar
- type. This means that constraints associated with the domain will
- not be enforced. This is not an issue for function arguments, but
- it is a hazard if you declare a PL/Tcl> function
- as returning a domain type.
+ PL/Tcl functions can return sets. To do this, the Tcl code should
+ call return_next once per row to be returned,
+ passing either the appropriate value when returning a scalar type,
+ or a list of column name/value pairs when returning a composite type.
+ Here is an example returning a scalar type:
+
+
+CREATE FUNCTION sequence(int, int) RETURNS SETOF int AS $$
+ for {set i $1} {$i < $2} {incr i} {
+ return_next $i
+ }
+$$ LANGUAGE pltcl;
+
+
+ and here is one returning a composite type:
+
+
+CREATE FUNCTION table_of_squares(int, int) RETURNS TABLE (x int, x2 int) AS $$
+ for {set i $1} {$i < $2} {incr i} {
+ return_next [list x $i x2 [expr {$i * $i}]]
+ }
+$$ LANGUAGE pltcl;
+
@@ -195,10 +235,9 @@ $$ LANGUAGE pltcl;
The argument values supplied to a PL/Tcl function's code are simply
the input arguments converted to text form (just as if they had been
displayed by a SELECT> statement). Conversely, the
- return>
- command will accept any string that is acceptable input format for
- the function's declared return type. So, within the PL/Tcl function,
- all values are just text strings.
+ return> and return_next> commands will accept
+ any string that is acceptable input format for the function's declared
+ result type, or for the specified column of a composite result type.
diff --git a/src/pl/tcl/expected/pltcl_queries.out b/src/pl/tcl/expected/pltcl_queries.out
index 6cb1fdbb61..3a9fef3447 100644
--- a/src/pl/tcl/expected/pltcl_queries.out
+++ b/src/pl/tcl/expected/pltcl_queries.out
@@ -303,3 +303,64 @@ select tcl_lastoid('t2') > 0;
t
(1 row)
+-- test some error cases
+CREATE FUNCTION tcl_error(OUT a int, OUT b int) AS $$return {$$ LANGUAGE pltcl;
+SELECT tcl_error();
+ERROR: missing close-brace
+CREATE FUNCTION bad_record(OUT a text, OUT b text) AS $$return [list a]$$ LANGUAGE pltcl;
+SELECT bad_record();
+ERROR: column name/value list must have even number of elements
+CREATE FUNCTION bad_field(OUT a text, OUT b text) AS $$return [list a 1 b 2 cow 3]$$ LANGUAGE pltcl;
+SELECT bad_field();
+ERROR: column name/value list contains nonexistent column name "cow"
+-- test compound return
+select * from tcl_test_cube_squared(5);
+ squared | cubed
+---------+-------
+ 25 | 125
+(1 row)
+
+-- test SRF
+select * from tcl_test_squared_rows(0,5);
+ x | y
+---+----
+ 0 | 0
+ 1 | 1
+ 2 | 4
+ 3 | 9
+ 4 | 16
+(5 rows)
+
+select * from tcl_test_sequence(0,5) as a;
+ a
+---
+ 0
+ 1
+ 2
+ 3
+ 4
+(5 rows)
+
+select 1, tcl_test_sequence(0,5);
+ ?column? | tcl_test_sequence
+----------+-------------------
+ 1 | 0
+ 1 | 1
+ 1 | 2
+ 1 | 3
+ 1 | 4
+(5 rows)
+
+CREATE FUNCTION non_srf() RETURNS int AS $$return_next 1$$ LANGUAGE pltcl;
+select non_srf();
+ERROR: return_next cannot be used in non-set-returning functions
+CREATE FUNCTION bad_record_srf(OUT a text, OUT b text) RETURNS SETOF record AS $$
+return_next [list a]
+$$ LANGUAGE pltcl;
+SELECT bad_record_srf();
+ERROR: column name/value list must have even number of elements
+CREATE FUNCTION bad_field_srf(OUT a text, OUT b text) RETURNS SETOF record AS $$
+return_next [list a 1 b 2 cow 3]
+$$ LANGUAGE pltcl;
+SELECT bad_field_srf();
+ERROR: column name/value list contains nonexistent column name "cow"
diff --git a/src/pl/tcl/expected/pltcl_setup.out b/src/pl/tcl/expected/pltcl_setup.out
index e65e9e3ff7..ed99d9b492 100644
--- a/src/pl/tcl/expected/pltcl_setup.out
+++ b/src/pl/tcl/expected/pltcl_setup.out
@@ -555,6 +555,19 @@ NOTICE: tclsnitch: ddl_command_start DROP TABLE
NOTICE: tclsnitch: ddl_command_end DROP TABLE
drop event trigger tcl_a_snitch;
drop event trigger tcl_b_snitch;
+CREATE FUNCTION tcl_test_cube_squared(in int, out squared int, out cubed int) AS $$
+ return [list squared [expr {$1 * $1}] cubed [expr {$1 * $1 * $1}]]
+$$ language pltcl;
+CREATE FUNCTION tcl_test_squared_rows(int,int) RETURNS TABLE (x int, y int) AS $$
+ for {set i $1} {$i < $2} {incr i} {
+ return_next [list y [expr {$i * $i}] x $i]
+ }
+$$ language pltcl;
+CREATE FUNCTION tcl_test_sequence(int,int) RETURNS SETOF int AS $$
+ for {set i $1} {$i < $2} {incr i} {
+ return_next $i
+ }
+$$ language pltcl;
-- test use of errorCode in error handling
create function tcl_error_handling_test() returns text as $$
global errorCode
diff --git a/src/pl/tcl/pltcl.c b/src/pl/tcl/pltcl.c
index 97d1f7ef7d..3d529c2e7d 100644
--- a/src/pl/tcl/pltcl.c
+++ b/src/pl/tcl/pltcl.c
@@ -21,6 +21,7 @@
#include "commands/trigger.h"
#include "executor/spi.h"
#include "fmgr.h"
+#include "funcapi.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
@@ -123,6 +124,9 @@ typedef struct pltcl_interp_desc
* problem to manage its memory when we replace a proc definition. We do
* not clean up pltcl_proc_descs when a pg_proc row is deleted, only when
* it is updated, and the same policy applies to Tcl's copy as well.)
+ *
+ * Note that the data in this struct is shared across all active calls;
+ * nothing except the fn_refcount should be changed by a call instance.
**********************************************************************/
typedef struct pltcl_proc_desc
{
@@ -137,6 +141,8 @@ typedef struct pltcl_proc_desc
pltcl_interp_desc *interp_desc; /* interpreter to use */
FmgrInfo result_in_func; /* input function for fn's result type */
Oid result_typioparam; /* param to pass to same */
+ bool fn_retisset; /* true if function returns a set */
+ bool fn_retistuple; /* true if function returns composite */
int nargs; /* number of arguments */
/* these arrays have nargs entries: */
FmgrInfo *arg_out_func; /* output fns for arg types */
@@ -188,6 +194,32 @@ typedef struct pltcl_proc_ptr
} pltcl_proc_ptr;
+/**********************************************************************
+ * Per-call state
+ **********************************************************************/
+typedef struct pltcl_call_state
+{
+ /* Call info struct, or NULL in a trigger */
+ FunctionCallInfo fcinfo;
+
+ /* Function we're executing (NULL if not yet identified) */
+ pltcl_proc_desc *prodesc;
+
+ /*
+ * Information for SRFs and functions returning composite types.
+ * ret_tupdesc and attinmeta are set up if either fn_retistuple or
+ * fn_retisset, since even a scalar-returning SRF needs a tuplestore.
+ */
+ TupleDesc ret_tupdesc; /* return rowtype, if retistuple or retisset */
+ AttInMetadata *attinmeta; /* metadata for building tuples of that type */
+
+ ReturnSetInfo *rsi; /* passed-in ReturnSetInfo, if any */
+ Tuplestorestate *tuple_store; /* SRFs accumulate result here */
+ MemoryContext tuple_store_cxt; /* context and resowner for tuplestore */
+ ResourceOwner tuple_store_owner;
+} pltcl_call_state;
+
+
/**********************************************************************
* Global data
**********************************************************************/
@@ -196,9 +228,8 @@ static Tcl_Interp *pltcl_hold_interp = NULL;
static HTAB *pltcl_interp_htab = NULL;
static HTAB *pltcl_proc_htab = NULL;
-/* these are saved and restored by pltcl_handler */
-static FunctionCallInfo pltcl_current_fcinfo = NULL;
-static pltcl_proc_desc *pltcl_current_prodesc = NULL;
+/* this is saved and restored by pltcl_handler */
+static pltcl_call_state *pltcl_current_call_state = NULL;
/**********************************************************************
* Lookup table for SQLSTATE condition names
@@ -225,10 +256,12 @@ static void pltcl_init_load_unknown(Tcl_Interp *interp);
static Datum pltcl_handler(PG_FUNCTION_ARGS, bool pltrusted);
-static Datum pltcl_func_handler(PG_FUNCTION_ARGS, bool pltrusted);
-
-static HeapTuple pltcl_trigger_handler(PG_FUNCTION_ARGS, bool pltrusted);
-static void pltcl_event_trigger_handler(PG_FUNCTION_ARGS, bool pltrusted);
+static Datum pltcl_func_handler(PG_FUNCTION_ARGS, pltcl_call_state *call_state,
+ bool pltrusted);
+static HeapTuple pltcl_trigger_handler(PG_FUNCTION_ARGS, pltcl_call_state *call_state,
+ bool pltrusted);
+static void pltcl_event_trigger_handler(PG_FUNCTION_ARGS, pltcl_call_state *call_state,
+ bool pltrusted);
static void throw_tcl_error(Tcl_Interp *interp, const char *proname);
@@ -246,7 +279,8 @@ static int pltcl_argisnull(ClientData cdata, Tcl_Interp *interp,
int objc, Tcl_Obj *const objv[]);
static int pltcl_returnnull(ClientData cdata, Tcl_Interp *interp,
int objc, Tcl_Obj *const objv[]);
-
+static int pltcl_returnnext(ClientData cdata, Tcl_Interp *interp,
+ int objc, Tcl_Obj *const objv[]);
static int pltcl_SPI_execute(ClientData cdata, Tcl_Interp *interp,
int objc, Tcl_Obj *const objv[]);
static int pltcl_process_SPI_result(Tcl_Interp *interp,
@@ -265,6 +299,10 @@ static int pltcl_SPI_lastoid(ClientData cdata, Tcl_Interp *interp,
static void pltcl_set_tuple_values(Tcl_Interp *interp, const char *arrayname,
uint64 tupno, HeapTuple tuple, TupleDesc tupdesc);
static Tcl_Obj *pltcl_build_tuple_argument(HeapTuple tuple, TupleDesc tupdesc);
+static HeapTuple pltcl_build_tuple_result(Tcl_Interp *interp,
+ Tcl_Obj **kvObjv, int kvObjc,
+ pltcl_call_state *call_state);
+static void pltcl_init_tuple_store(pltcl_call_state *call_state);
/*
@@ -432,7 +470,8 @@ pltcl_init_interp(pltcl_interp_desc *interp_desc, bool pltrusted)
pltcl_argisnull, NULL, NULL);
Tcl_CreateObjCommand(interp, "return_null",
pltcl_returnnull, NULL, NULL);
-
+ Tcl_CreateObjCommand(interp, "return_next",
+ pltcl_returnnext, NULL, NULL);
Tcl_CreateObjCommand(interp, "spi_exec",
pltcl_SPI_execute, NULL, NULL);
Tcl_CreateObjCommand(interp, "spi_prepare",
@@ -625,29 +664,33 @@ pltclu_call_handler(PG_FUNCTION_ARGS)
}
+/**********************************************************************
+ * pltcl_handler() - Handler for function and trigger calls, for
+ * both trusted and untrusted interpreters.
+ **********************************************************************/
static Datum
pltcl_handler(PG_FUNCTION_ARGS, bool pltrusted)
{
Datum retval;
- FunctionCallInfo save_fcinfo;
- pltcl_proc_desc *save_prodesc;
- pltcl_proc_desc *this_prodesc;
+ pltcl_call_state current_call_state;
+ pltcl_call_state *save_call_state;
/*
- * Ensure that static pointers are saved/restored properly
+ * Initialize current_call_state to nulls/zeroes; in particular, set its
+ * prodesc pointer to null. Anything that sets it non-null should
+ * increase the prodesc's fn_refcount at the same time. We'll decrease
+ * the refcount, and then delete the prodesc if it's no longer referenced,
+ * on the way out of this function. This ensures that prodescs live as
+ * long as needed even if somebody replaces the originating pg_proc row
+ * while they're executing.
*/
- save_fcinfo = pltcl_current_fcinfo;
- save_prodesc = pltcl_current_prodesc;
+ memset(¤t_call_state, 0, sizeof(current_call_state));
/*
- * Reset pltcl_current_prodesc to null. Anything that sets it non-null
- * should increase the prodesc's fn_refcount at the same time. We'll
- * decrease the refcount, and then delete the prodesc if it's no longer
- * referenced, on the way out of this function. This ensures that
- * prodescs live as long as needed even if somebody replaces the
- * originating pg_proc row while they're executing.
+ * Ensure that static pointer is saved/restored properly
*/
- pltcl_current_prodesc = NULL;
+ save_call_state = pltcl_current_call_state;
+ pltcl_current_call_state = ¤t_call_state;
PG_TRY();
{
@@ -657,47 +700,46 @@ pltcl_handler(PG_FUNCTION_ARGS, bool pltrusted)
*/
if (CALLED_AS_TRIGGER(fcinfo))
{
- pltcl_current_fcinfo = NULL;
- retval = PointerGetDatum(pltcl_trigger_handler(fcinfo, pltrusted));
+ /* invoke the trigger handler */
+ retval = PointerGetDatum(pltcl_trigger_handler(fcinfo,
+ ¤t_call_state,
+ pltrusted));
}
else if (CALLED_AS_EVENT_TRIGGER(fcinfo))
{
- pltcl_current_fcinfo = NULL;
- pltcl_event_trigger_handler(fcinfo, pltrusted);
+ /* invoke the event trigger handler */
+ pltcl_event_trigger_handler(fcinfo, ¤t_call_state, pltrusted);
retval = (Datum) 0;
}
else
{
- pltcl_current_fcinfo = fcinfo;
- retval = pltcl_func_handler(fcinfo, pltrusted);
+ /* invoke the regular function handler */
+ current_call_state.fcinfo = fcinfo;
+ retval = pltcl_func_handler(fcinfo, ¤t_call_state, pltrusted);
}
}
PG_CATCH();
{
- /* Restore globals, then clean up the prodesc refcount if any */
- this_prodesc = pltcl_current_prodesc;
- pltcl_current_fcinfo = save_fcinfo;
- pltcl_current_prodesc = save_prodesc;
- if (this_prodesc != NULL)
+ /* Restore static pointer, then clean up the prodesc refcount if any */
+ pltcl_current_call_state = save_call_state;
+ if (current_call_state.prodesc != NULL)
{
- Assert(this_prodesc->fn_refcount > 0);
- if (--this_prodesc->fn_refcount == 0)
- MemoryContextDelete(this_prodesc->fn_cxt);
+ Assert(current_call_state.prodesc->fn_refcount > 0);
+ if (--current_call_state.prodesc->fn_refcount == 0)
+ MemoryContextDelete(current_call_state.prodesc->fn_cxt);
}
PG_RE_THROW();
}
PG_END_TRY();
- /* Restore globals, then clean up the prodesc refcount if any */
+ /* Restore static pointer, then clean up the prodesc refcount if any */
/* (We're being paranoid in case an error is thrown in context deletion) */
- this_prodesc = pltcl_current_prodesc;
- pltcl_current_fcinfo = save_fcinfo;
- pltcl_current_prodesc = save_prodesc;
- if (this_prodesc != NULL)
+ pltcl_current_call_state = save_call_state;
+ if (current_call_state.prodesc != NULL)
{
- Assert(this_prodesc->fn_refcount > 0);
- if (--this_prodesc->fn_refcount == 0)
- MemoryContextDelete(this_prodesc->fn_cxt);
+ Assert(current_call_state.prodesc->fn_refcount > 0);
+ if (--current_call_state.prodesc->fn_refcount == 0)
+ MemoryContextDelete(current_call_state.prodesc->fn_cxt);
}
return retval;
@@ -708,7 +750,8 @@ pltcl_handler(PG_FUNCTION_ARGS, bool pltrusted)
* pltcl_func_handler() - Handler for regular function calls
**********************************************************************/
static Datum
-pltcl_func_handler(PG_FUNCTION_ARGS, bool pltrusted)
+pltcl_func_handler(PG_FUNCTION_ARGS, pltcl_call_state *call_state,
+ bool pltrusted)
{
pltcl_proc_desc *prodesc;
Tcl_Interp *volatile interp;
@@ -725,11 +768,32 @@ pltcl_func_handler(PG_FUNCTION_ARGS, bool pltrusted)
prodesc = compile_pltcl_function(fcinfo->flinfo->fn_oid, InvalidOid,
false, pltrusted);
- pltcl_current_prodesc = prodesc;
+ call_state->prodesc = prodesc;
prodesc->fn_refcount++;
interp = prodesc->interp_desc->interp;
+ /*
+ * If we're a SRF, check caller can handle materialize mode, and save
+ * relevant info into call_state. We must ensure that the returned
+ * tuplestore is owned by the caller's context, even if we first create it
+ * inside a subtransaction.
+ */
+ if (prodesc->fn_retisset)
+ {
+ ReturnSetInfo *rsi = (ReturnSetInfo *) fcinfo->resultinfo;
+
+ if (!rsi || !IsA(rsi, ReturnSetInfo) ||
+ (rsi->allowedModes & SFRM_Materialize) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("set-valued function called in context that cannot accept a set")));
+
+ call_state->rsi = rsi;
+ call_state->tuple_store_cxt = rsi->econtext->ecxt_per_query_memory;
+ call_state->tuple_store_owner = CurrentResourceOwner;
+ }
+
/************************************************************
* Create the tcl command to call the internal
* proc in the Tcl interpreter
@@ -838,11 +902,72 @@ pltcl_func_handler(PG_FUNCTION_ARGS, bool pltrusted)
if (SPI_finish() != SPI_OK_FINISH)
elog(ERROR, "SPI_finish() failed");
- if (fcinfo->isnull)
+ if (prodesc->fn_retisset)
+ {
+ ReturnSetInfo *rsi = call_state->rsi;
+
+ /* We already checked this is OK */
+ rsi->returnMode = SFRM_Materialize;
+
+ /* If we produced any tuples, send back the result */
+ if (call_state->tuple_store)
+ {
+ rsi->setResult = call_state->tuple_store;
+ if (call_state->ret_tupdesc)
+ {
+ MemoryContext oldcxt;
+
+ oldcxt = MemoryContextSwitchTo(call_state->tuple_store_cxt);
+ rsi->setDesc = CreateTupleDescCopy(call_state->ret_tupdesc);
+ MemoryContextSwitchTo(oldcxt);
+ }
+ }
+ retval = (Datum) 0;
+ fcinfo->isnull = true;
+ }
+ else if (fcinfo->isnull)
+ {
retval = InputFunctionCall(&prodesc->result_in_func,
NULL,
prodesc->result_typioparam,
-1);
+ }
+ else if (prodesc->fn_retistuple)
+ {
+ TupleDesc td;
+ HeapTuple tup;
+ Tcl_Obj *resultObj;
+ Tcl_Obj **resultObjv;
+ int resultObjc;
+
+ /*
+ * Set up data about result type. XXX it's tempting to consider
+ * caching this in the prodesc, in the common case where the rowtype
+ * is determined by the function not the calling query. But we'd have
+ * to be able to deal with ADD/DROP/ALTER COLUMN events when the
+ * result type is a named composite type, so it's not exactly trivial.
+ * Maybe worth improving someday.
+ */
+ if (get_call_result_type(fcinfo, NULL, &td) != TYPEFUNC_COMPOSITE)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("function returning record called in context "
+ "that cannot accept type record")));
+
+ Assert(!call_state->ret_tupdesc);
+ Assert(!call_state->attinmeta);
+ call_state->ret_tupdesc = td;
+ call_state->attinmeta = TupleDescGetAttInMetadata(td);
+
+ /* Convert function result to tuple */
+ resultObj = Tcl_GetObjResult(interp);
+ if (Tcl_ListObjGetElements(interp, resultObj, &resultObjc, &resultObjv) == TCL_ERROR)
+ throw_tcl_error(interp, prodesc->user_proname);
+
+ tup = pltcl_build_tuple_result(interp, resultObjv, resultObjc,
+ call_state);
+ retval = HeapTupleGetDatum(tup);
+ }
else
retval = InputFunctionCall(&prodesc->result_in_func,
utf_u2e(Tcl_GetStringResult(interp)),
@@ -857,7 +982,8 @@ pltcl_func_handler(PG_FUNCTION_ARGS, bool pltrusted)
* pltcl_trigger_handler() - Handler for trigger calls
**********************************************************************/
static HeapTuple
-pltcl_trigger_handler(PG_FUNCTION_ARGS, bool pltrusted)
+pltcl_trigger_handler(PG_FUNCTION_ARGS, pltcl_call_state *call_state,
+ bool pltrusted)
{
pltcl_proc_desc *prodesc;
Tcl_Interp *volatile interp;
@@ -886,7 +1012,7 @@ pltcl_trigger_handler(PG_FUNCTION_ARGS, bool pltrusted)
false, /* not an event trigger */
pltrusted);
- pltcl_current_prodesc = prodesc;
+ call_state->prodesc = prodesc;
prodesc->fn_refcount++;
interp = prodesc->interp_desc->interp;
@@ -1169,7 +1295,8 @@ pltcl_trigger_handler(PG_FUNCTION_ARGS, bool pltrusted)
* pltcl_event_trigger_handler() - Handler for event trigger calls
**********************************************************************/
static void
-pltcl_event_trigger_handler(PG_FUNCTION_ARGS, bool pltrusted)
+pltcl_event_trigger_handler(PG_FUNCTION_ARGS, pltcl_call_state *call_state,
+ bool pltrusted)
{
pltcl_proc_desc *prodesc;
Tcl_Interp *volatile interp;
@@ -1185,7 +1312,7 @@ pltcl_event_trigger_handler(PG_FUNCTION_ARGS, bool pltrusted)
prodesc = compile_pltcl_function(fcinfo->flinfo->fn_oid,
InvalidOid, true, pltrusted);
- pltcl_current_prodesc = prodesc;
+ call_state->prodesc = prodesc;
prodesc->fn_refcount++;
interp = prodesc->interp_desc->interp;
@@ -1389,10 +1516,11 @@ compile_pltcl_function(Oid fn_oid, Oid tgreloid,
procStruct->prorettype);
typeStruct = (Form_pg_type) GETSTRUCT(typeTup);
- /* Disallow pseudotype result, except VOID */
+ /* Disallow pseudotype result, except VOID and RECORD */
if (typeStruct->typtype == TYPTYPE_PSEUDO)
{
- if (procStruct->prorettype == VOIDOID)
+ if (procStruct->prorettype == VOIDOID ||
+ procStruct->prorettype == RECORDOID)
/* okay */ ;
else if (procStruct->prorettype == TRIGGEROID ||
procStruct->prorettype == EVTTRIGGEROID)
@@ -1406,16 +1534,15 @@ compile_pltcl_function(Oid fn_oid, Oid tgreloid,
format_type_be(procStruct->prorettype))));
}
- if (typeStruct->typtype == TYPTYPE_COMPOSITE)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("PL/Tcl functions cannot return composite types")));
-
fmgr_info_cxt(typeStruct->typinput,
&(prodesc->result_in_func),
proc_cxt);
prodesc->result_typioparam = getTypeIOParam(typeTup);
+ prodesc->fn_retisset = procStruct->proretset;
+ prodesc->fn_retistuple = (procStruct->prorettype == RECORDOID ||
+ typeStruct->typtype == TYPTYPE_COMPOSITE);
+
ReleaseSysCache(typeTup);
}
@@ -1914,7 +2041,7 @@ pltcl_argisnull(ClientData cdata, Tcl_Interp *interp,
int objc, Tcl_Obj *const objv[])
{
int argno;
- FunctionCallInfo fcinfo = pltcl_current_fcinfo;
+ FunctionCallInfo fcinfo = pltcl_current_call_state->fcinfo;
/************************************************************
* Check call syntax
@@ -1967,7 +2094,7 @@ static int
pltcl_returnnull(ClientData cdata, Tcl_Interp *interp,
int objc, Tcl_Obj *const objv[])
{
- FunctionCallInfo fcinfo = pltcl_current_fcinfo;
+ FunctionCallInfo fcinfo = pltcl_current_call_state->fcinfo;
/************************************************************
* Check call syntax
@@ -1998,6 +2125,95 @@ pltcl_returnnull(ClientData cdata, Tcl_Interp *interp,
}
+/**********************************************************************
+ * pltcl_returnnext() - Add a row to the result tuplestore in a SRF.
+ **********************************************************************/
+static int
+pltcl_returnnext(ClientData cdata, Tcl_Interp *interp,
+ int objc, Tcl_Obj *const objv[])
+{
+ pltcl_call_state *call_state = pltcl_current_call_state;
+ FunctionCallInfo fcinfo = call_state->fcinfo;
+ pltcl_proc_desc *prodesc = call_state->prodesc;
+ int result = TCL_OK;
+ MemoryContext tmpcxt;
+ MemoryContext oldcxt;
+
+ /*
+ * Check that we're called as a set-returning function
+ */
+ if (fcinfo == NULL)
+ {
+ Tcl_SetObjResult(interp,
+ Tcl_NewStringObj("return_next cannot be used in triggers", -1));
+ return TCL_ERROR;
+ }
+
+ if (!prodesc->fn_retisset)
+ {
+ Tcl_SetObjResult(interp,
+ Tcl_NewStringObj("return_next cannot be used in non-set-returning functions", -1));
+ return TCL_ERROR;
+ }
+
+ /*
+ * Check call syntax
+ */
+ if (objc != 2)
+ {
+ Tcl_WrongNumArgs(interp, 1, objv, "result");
+ return TCL_ERROR;
+ }
+
+ /* Set up tuple store if first output row */
+ if (call_state->tuple_store == NULL)
+ pltcl_init_tuple_store(call_state);
+
+ /* Make short-lived context to run input functions in */
+ tmpcxt = AllocSetContextCreate(CurrentMemoryContext,
+ "pltcl_returnnext",
+ ALLOCSET_SMALL_SIZES);
+ oldcxt = MemoryContextSwitchTo(tmpcxt);
+
+ if (prodesc->fn_retistuple)
+ {
+ Tcl_Obj **rowObjv;
+ int rowObjc;
+
+ /* result should be a list, so break it down */
+ if (Tcl_ListObjGetElements(interp, objv[1], &rowObjc, &rowObjv) == TCL_ERROR)
+ result = TCL_ERROR;
+ else
+ {
+ HeapTuple tuple;
+
+ SPI_push();
+ tuple = pltcl_build_tuple_result(interp, rowObjv, rowObjc,
+ call_state);
+ tuplestore_puttuple(call_state->tuple_store, tuple);
+ SPI_pop();
+ }
+ }
+ else
+ {
+ Datum retval;
+ bool isNull = false;
+
+ retval = InputFunctionCall(&prodesc->result_in_func,
+ utf_u2e((char *) Tcl_GetString(objv[1])),
+ prodesc->result_typioparam,
+ -1);
+ tuplestore_putvalues(call_state->tuple_store, call_state->ret_tupdesc,
+ &retval, &isNull);
+ }
+
+ MemoryContextSwitchTo(oldcxt);
+ MemoryContextDelete(tmpcxt);
+
+ return result;
+}
+
+
/*----------
* Support for running SPI operations inside subtransactions
*
@@ -2164,7 +2380,7 @@ pltcl_SPI_execute(ClientData cdata, Tcl_Interp *interp,
{
UTF_BEGIN;
spi_rc = SPI_execute(UTF_U2E(Tcl_GetString(objv[query_idx])),
- pltcl_current_prodesc->fn_readonly, count);
+ pltcl_current_call_state->prodesc->fn_readonly, count);
UTF_END;
my_rc = pltcl_process_SPI_result(interp,
@@ -2414,7 +2630,7 @@ pltcl_SPI_prepare(ClientData cdata, Tcl_Interp *interp,
* Insert a hashtable entry for the plan and return
* the key to the caller
************************************************************/
- query_hash = &pltcl_current_prodesc->interp_desc->query_hash;
+ query_hash = &pltcl_current_call_state->prodesc->interp_desc->query_hash;
hashent = Tcl_CreateHashEntry(query_hash, qdesc->qname, &hashnew);
Tcl_SetHashValue(hashent, (ClientData) qdesc);
@@ -2503,7 +2719,7 @@ pltcl_SPI_execute_plan(ClientData cdata, Tcl_Interp *interp,
return TCL_ERROR;
}
- query_hash = &pltcl_current_prodesc->interp_desc->query_hash;
+ query_hash = &pltcl_current_call_state->prodesc->interp_desc->query_hash;
hashent = Tcl_FindHashEntry(query_hash, Tcl_GetString(objv[i]));
if (hashent == NULL)
@@ -2618,7 +2834,8 @@ pltcl_SPI_execute_plan(ClientData cdata, Tcl_Interp *interp,
* Execute the plan
************************************************************/
spi_rc = SPI_execute_plan(qdesc->plan, argvalues, nulls,
- pltcl_current_prodesc->fn_readonly, count);
+ pltcl_current_call_state->prodesc->fn_readonly,
+ count);
my_rc = pltcl_process_SPI_result(interp,
arrayname,
@@ -2808,3 +3025,88 @@ pltcl_build_tuple_argument(HeapTuple tuple, TupleDesc tupdesc)
return retobj;
}
+
+/**********************************************************************
+ * pltcl_build_tuple_result() - Build a tuple of function's result rowtype
+ * from a Tcl list of column names and values
+ *
+ * Note: this function leaks memory. Even if we made it clean up its own
+ * mess, there's no way to prevent the datatype input functions it calls
+ * from leaking. Run it in a short-lived context, unless we're about to
+ * exit the procedure anyway.
+ *
+ * Also, caller is responsible for doing SPI_push/SPI_pop if calling from
+ * inside SPI environment.
+ **********************************************************************/
+static HeapTuple
+pltcl_build_tuple_result(Tcl_Interp *interp, Tcl_Obj **kvObjv, int kvObjc,
+ pltcl_call_state *call_state)
+{
+ char **values;
+ int i;
+
+ if (kvObjc % 2 != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("column name/value list must have even number of elements")));
+
+ values = (char **) palloc0(call_state->ret_tupdesc->natts * sizeof(char *));
+
+ for (i = 0; i < kvObjc; i += 2)
+ {
+ char *fieldName = utf_e2u(Tcl_GetString(kvObjv[i]));
+ int attn = SPI_fnumber(call_state->ret_tupdesc, fieldName);
+
+ if (attn <= 0 || call_state->ret_tupdesc->attrs[attn - 1]->attisdropped)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column name/value list contains nonexistent column name \"%s\"",
+ fieldName)));
+
+ values[attn - 1] = utf_e2u(Tcl_GetString(kvObjv[i + 1]));
+ }
+
+ return BuildTupleFromCStrings(call_state->attinmeta, values);
+}
+
+/**********************************************************************
+ * pltcl_init_tuple_store() - Initialize the result tuplestore for a SRF
+ **********************************************************************/
+static void
+pltcl_init_tuple_store(pltcl_call_state *call_state)
+{
+ ReturnSetInfo *rsi = call_state->rsi;
+ MemoryContext oldcxt;
+ ResourceOwner oldowner;
+
+ /* Should be in a SRF */
+ Assert(rsi);
+ /* Should be first time through */
+ Assert(!call_state->tuple_store);
+ Assert(!call_state->attinmeta);
+
+ /* We expect caller to provide an appropriate result tupdesc */
+ Assert(rsi->expectedDesc);
+ call_state->ret_tupdesc = rsi->expectedDesc;
+
+ /*
+ * Switch to the right memory context and resource owner for storing the
+ * tuplestore. If we're within a subtransaction opened for an exception
+ * block, for example, we must still create the tuplestore in the resource
+ * owner that was active when this function was entered, and not in the
+ * subtransaction's resource owner.
+ */
+ oldcxt = MemoryContextSwitchTo(call_state->tuple_store_cxt);
+ oldowner = CurrentResourceOwner;
+ CurrentResourceOwner = call_state->tuple_store_owner;
+
+ call_state->tuple_store =
+ tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random,
+ false, work_mem);
+
+ /* Build attinmeta in this context, too */
+ call_state->attinmeta = TupleDescGetAttInMetadata(call_state->ret_tupdesc);
+
+ CurrentResourceOwner = oldowner;
+ MemoryContextSwitchTo(oldcxt);
+}
diff --git a/src/pl/tcl/sql/pltcl_queries.sql b/src/pl/tcl/sql/pltcl_queries.sql
index a0a9619a9b..0ebfe65340 100644
--- a/src/pl/tcl/sql/pltcl_queries.sql
+++ b/src/pl/tcl/sql/pltcl_queries.sql
@@ -97,3 +97,36 @@ create temp table t1 (f1 int);
select tcl_lastoid('t1');
create temp table t2 (f1 int) with oids;
select tcl_lastoid('t2') > 0;
+
+-- test some error cases
+CREATE FUNCTION tcl_error(OUT a int, OUT b int) AS $$return {$$ LANGUAGE pltcl;
+SELECT tcl_error();
+
+CREATE FUNCTION bad_record(OUT a text, OUT b text) AS $$return [list a]$$ LANGUAGE pltcl;
+SELECT bad_record();
+
+CREATE FUNCTION bad_field(OUT a text, OUT b text) AS $$return [list a 1 b 2 cow 3]$$ LANGUAGE pltcl;
+SELECT bad_field();
+
+-- test compound return
+select * from tcl_test_cube_squared(5);
+
+-- test SRF
+select * from tcl_test_squared_rows(0,5);
+
+select * from tcl_test_sequence(0,5) as a;
+
+select 1, tcl_test_sequence(0,5);
+
+CREATE FUNCTION non_srf() RETURNS int AS $$return_next 1$$ LANGUAGE pltcl;
+select non_srf();
+
+CREATE FUNCTION bad_record_srf(OUT a text, OUT b text) RETURNS SETOF record AS $$
+return_next [list a]
+$$ LANGUAGE pltcl;
+SELECT bad_record_srf();
+
+CREATE FUNCTION bad_field_srf(OUT a text, OUT b text) RETURNS SETOF record AS $$
+return_next [list a 1 b 2 cow 3]
+$$ LANGUAGE pltcl;
+SELECT bad_field_srf();
diff --git a/src/pl/tcl/sql/pltcl_setup.sql b/src/pl/tcl/sql/pltcl_setup.sql
index 8df65a5816..58f38d53aa 100644
--- a/src/pl/tcl/sql/pltcl_setup.sql
+++ b/src/pl/tcl/sql/pltcl_setup.sql
@@ -596,6 +596,22 @@ drop table foo;
drop event trigger tcl_a_snitch;
drop event trigger tcl_b_snitch;
+CREATE FUNCTION tcl_test_cube_squared(in int, out squared int, out cubed int) AS $$
+ return [list squared [expr {$1 * $1}] cubed [expr {$1 * $1 * $1}]]
+$$ language pltcl;
+
+CREATE FUNCTION tcl_test_squared_rows(int,int) RETURNS TABLE (x int, y int) AS $$
+ for {set i $1} {$i < $2} {incr i} {
+ return_next [list y [expr {$i * $i}] x $i]
+ }
+$$ language pltcl;
+
+CREATE FUNCTION tcl_test_sequence(int,int) RETURNS SETOF int AS $$
+ for {set i $1} {$i < $2} {incr i} {
+ return_next $i
+ }
+$$ language pltcl;
+
-- test use of errorCode in error handling
create function tcl_error_handling_test() returns text as $$
--
cgit v1.2.3
From 33cb96ba1a84c612491fb5794674a649d1a6a4d6 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Mon, 7 Nov 2016 10:19:22 -0500
Subject: Revert "Provide DLLEXPORT markers for C functions via
PG_FUNCTION_INFO_V1 macro."
This reverts commit c8ead2a3974d3eada145a0e18940150039493cc9.
Seems there is no way to do this that doesn't cause MSVC to give
warnings, so let's just go back to the way we've been doing it.
Discussion: <11843.1478358206@sss.pgh.pa.us>
---
contrib/hstore/hstore.h | 2 +-
contrib/ltree/ltree.h | 40 ++++++++++++++++++++--------------------
doc/src/sgml/xfunc.sgml | 17 -----------------
src/include/fmgr.h | 7 ++++---
4 files changed, 25 insertions(+), 41 deletions(-)
(limited to 'doc/src')
diff --git a/contrib/hstore/hstore.h b/contrib/hstore/hstore.h
index 6303fa4061..6bab08b7de 100644
--- a/contrib/hstore/hstore.h
+++ b/contrib/hstore/hstore.h
@@ -194,7 +194,7 @@ extern Pairs *hstoreArrayToPairs(ArrayType *a, int *npairs);
#if HSTORE_POLLUTE_NAMESPACE
#define HSTORE_POLLUTE(newname_,oldname_) \
PG_FUNCTION_INFO_V1(oldname_); \
- extern PGDLLEXPORT Datum newname_(PG_FUNCTION_ARGS); \
+ Datum newname_(PG_FUNCTION_ARGS); \
Datum oldname_(PG_FUNCTION_ARGS) { return newname_(fcinfo); } \
extern int no_such_variable
#else
diff --git a/contrib/ltree/ltree.h b/contrib/ltree/ltree.h
index c7aa7f8818..c604357dbf 100644
--- a/contrib/ltree/ltree.h
+++ b/contrib/ltree/ltree.h
@@ -130,30 +130,30 @@ typedef struct
/* use in array iterator */
-extern PGDLLEXPORT Datum ltree_isparent(PG_FUNCTION_ARGS);
-extern PGDLLEXPORT Datum ltree_risparent(PG_FUNCTION_ARGS);
-extern PGDLLEXPORT Datum ltq_regex(PG_FUNCTION_ARGS);
-extern PGDLLEXPORT Datum ltq_rregex(PG_FUNCTION_ARGS);
-extern PGDLLEXPORT Datum lt_q_regex(PG_FUNCTION_ARGS);
-extern PGDLLEXPORT Datum lt_q_rregex(PG_FUNCTION_ARGS);
-extern PGDLLEXPORT Datum ltxtq_exec(PG_FUNCTION_ARGS);
-extern PGDLLEXPORT Datum ltxtq_rexec(PG_FUNCTION_ARGS);
-extern PGDLLEXPORT Datum _ltq_regex(PG_FUNCTION_ARGS);
-extern PGDLLEXPORT Datum _ltq_rregex(PG_FUNCTION_ARGS);
-extern PGDLLEXPORT Datum _lt_q_regex(PG_FUNCTION_ARGS);
-extern PGDLLEXPORT Datum _lt_q_rregex(PG_FUNCTION_ARGS);
-extern PGDLLEXPORT Datum _ltxtq_exec(PG_FUNCTION_ARGS);
-extern PGDLLEXPORT Datum _ltxtq_rexec(PG_FUNCTION_ARGS);
-extern PGDLLEXPORT Datum _ltree_isparent(PG_FUNCTION_ARGS);
-extern PGDLLEXPORT Datum _ltree_risparent(PG_FUNCTION_ARGS);
+Datum ltree_isparent(PG_FUNCTION_ARGS);
+Datum ltree_risparent(PG_FUNCTION_ARGS);
+Datum ltq_regex(PG_FUNCTION_ARGS);
+Datum ltq_rregex(PG_FUNCTION_ARGS);
+Datum lt_q_regex(PG_FUNCTION_ARGS);
+Datum lt_q_rregex(PG_FUNCTION_ARGS);
+Datum ltxtq_exec(PG_FUNCTION_ARGS);
+Datum ltxtq_rexec(PG_FUNCTION_ARGS);
+Datum _ltq_regex(PG_FUNCTION_ARGS);
+Datum _ltq_rregex(PG_FUNCTION_ARGS);
+Datum _lt_q_regex(PG_FUNCTION_ARGS);
+Datum _lt_q_rregex(PG_FUNCTION_ARGS);
+Datum _ltxtq_exec(PG_FUNCTION_ARGS);
+Datum _ltxtq_rexec(PG_FUNCTION_ARGS);
+Datum _ltree_isparent(PG_FUNCTION_ARGS);
+Datum _ltree_risparent(PG_FUNCTION_ARGS);
/* Concatenation functions */
-extern PGDLLEXPORT Datum ltree_addltree(PG_FUNCTION_ARGS);
-extern PGDLLEXPORT Datum ltree_addtext(PG_FUNCTION_ARGS);
-extern PGDLLEXPORT Datum ltree_textadd(PG_FUNCTION_ARGS);
+Datum ltree_addltree(PG_FUNCTION_ARGS);
+Datum ltree_addtext(PG_FUNCTION_ARGS);
+Datum ltree_textadd(PG_FUNCTION_ARGS);
/* Util function */
-extern PGDLLEXPORT Datum ltree_in(PG_FUNCTION_ARGS);
+Datum ltree_in(PG_FUNCTION_ARGS);
bool ltree_execute(ITEM *curitem, void *checkval,
bool calcnot, bool (*chkcond) (void *checkval, ITEM *val));
diff --git a/doc/src/sgml/xfunc.sgml b/doc/src/sgml/xfunc.sgml
index 6060e61857..de6a466efc 100644
--- a/doc/src/sgml/xfunc.sgml
+++ b/doc/src/sgml/xfunc.sgml
@@ -2577,23 +2577,6 @@ concat_text(PG_FUNCTION_ARGS)
error messages to this effect.
-
-
-
- To work correctly on Windows, C>-language functions need
- to be marked with PGDLLEXPORT>, unless you use a build
- process that marks all global functions that way. In simple cases
- this detail will be handled transparently by
- the PG_FUNCTION_INFO_V1> macro. However, if you write
- explicit external declarations (perhaps in header files), be sure
- to write them like this:
-
-extern PGDLLEXPORT Datum funcname(PG_FUNCTION_ARGS);
-
- or you'll get compiler complaints when building on Windows. (On
- other platforms, the PGDLLEXPORT> macro does nothing.)
-
-
diff --git a/src/include/fmgr.h b/src/include/fmgr.h
index 3668ac3f6e..0878418516 100644
--- a/src/include/fmgr.h
+++ b/src/include/fmgr.h
@@ -350,11 +350,12 @@ typedef const Pg_finfo_record *(*PGFInfoFunction) (void);
*
* On Windows, the function and info function must be exported. Our normal
* build processes take care of that via .DEF files or --export-all-symbols.
- * Module authors using a different build process might do it differently,
- * so we declare these functions PGDLLEXPORT for their convenience.
+ * Module authors using a different build process might need to manually
+ * declare the function PGDLLEXPORT. We do that automatically here for the
+ * info function, since authors shouldn't need to be explicitly aware of it.
*/
#define PG_FUNCTION_INFO_V1(funcname) \
-extern PGDLLEXPORT Datum funcname(PG_FUNCTION_ARGS); \
+extern Datum funcname(PG_FUNCTION_ARGS); \
extern PGDLLEXPORT const Pg_finfo_record * CppConcat(pg_finfo_,funcname)(void); \
const Pg_finfo_record * \
CppConcat(pg_finfo_,funcname) (void) \
--
cgit v1.2.3
From 36ac6d0e793087153a452df6502d0ef32a780db6 Mon Sep 17 00:00:00 2001
From: Magnus Hagander
Date: Tue, 8 Nov 2016 18:34:59 +0100
Subject: Fix typo
---
doc/src/sgml/ref/pg_basebackup.sgml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index e66a7ae8ee..1f15a17d0e 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -88,7 +88,7 @@ PostgreSQL documentation
There is no guarantee that all WAL files required for the backup are archived
at the end of backup. If you are planning to use the backup for an archive
recovery and want to ensure that all required files are available at that moment,
- you need to include them into the backup by using -x> option.
+ you need to include them into the backup by using the -x> option.
--
cgit v1.2.3
From 6d30fb1f75a57d80f80e27770d39d88f8aa32d28 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Tue, 8 Nov 2016 13:11:15 -0500
Subject: Make SPI_fnumber() reject dropped columns.
There's basically no scenario where it's sensible for this to match
dropped columns, so put a test for dropped-ness into SPI_fnumber()
itself, and excise the test from the small number of callers that
were paying attention to the case. (Most weren't :-(.)
In passing, normalize tests at call sites: always reject attnum <= 0
if we're disallowing system columns. Previously there was a mixture
of "< 0" and "<= 0" tests. This makes no practical difference since
SPI_fnumber() never returns 0, but I'm feeling pedantic today.
Also, in the places that are actually live user-facing code and not
legacy cruft, distinguish "column not found" from "can't handle
system column".
Per discussion with Jim Nasby; thi supersedes his original patch
that just changed the behavior at one call site.
Discussion:
---
contrib/spi/autoinc.c | 2 +-
contrib/spi/insert_username.c | 2 +-
contrib/spi/moddatetime.c | 4 ++--
contrib/spi/refint.c | 5 +++--
contrib/spi/timetravel.c | 4 ++--
doc/src/sgml/spi.sgml | 2 +-
src/backend/executor/spi.c | 3 ++-
src/backend/utils/adt/tsvector_op.c | 1 +
src/pl/plperl/plperl.c | 7 ++++++-
src/pl/tcl/pltcl.c | 11 +----------
src/test/regress/regress.c | 9 +++++----
11 files changed, 25 insertions(+), 25 deletions(-)
(limited to 'doc/src')
diff --git a/contrib/spi/autoinc.c b/contrib/spi/autoinc.c
index 41eae4fdc4..fc657a7c06 100644
--- a/contrib/spi/autoinc.c
+++ b/contrib/spi/autoinc.c
@@ -71,7 +71,7 @@ autoinc(PG_FUNCTION_ARGS)
int32 val;
Datum seqname;
- if (attnum < 0)
+ if (attnum <= 0)
ereport(ERROR,
(errcode(ERRCODE_TRIGGERED_ACTION_EXCEPTION),
errmsg("\"%s\" has no attribute \"%s\"",
diff --git a/contrib/spi/insert_username.c b/contrib/spi/insert_username.c
index 3812525c4c..617c60a81c 100644
--- a/contrib/spi/insert_username.c
+++ b/contrib/spi/insert_username.c
@@ -67,7 +67,7 @@ insert_username(PG_FUNCTION_ARGS)
attnum = SPI_fnumber(tupdesc, args[0]);
- if (attnum < 0)
+ if (attnum <= 0)
ereport(ERROR,
(errcode(ERRCODE_TRIGGERED_ACTION_EXCEPTION),
errmsg("\"%s\" has no attribute \"%s\"", relname, args[0])));
diff --git a/contrib/spi/moddatetime.c b/contrib/spi/moddatetime.c
index c6d33b7355..cd700fe6d1 100644
--- a/contrib/spi/moddatetime.c
+++ b/contrib/spi/moddatetime.c
@@ -84,9 +84,9 @@ moddatetime(PG_FUNCTION_ARGS)
/*
* This is where we check to see if the field we are supposed to update
- * even exists. The above function must return -1 if name not found?
+ * even exists.
*/
- if (attnum < 0)
+ if (attnum <= 0)
ereport(ERROR,
(errcode(ERRCODE_TRIGGERED_ACTION_EXCEPTION),
errmsg("\"%s\" has no attribute \"%s\"",
diff --git a/contrib/spi/refint.c b/contrib/spi/refint.c
index 01dd717522..78cfedf219 100644
--- a/contrib/spi/refint.c
+++ b/contrib/spi/refint.c
@@ -135,7 +135,7 @@ check_primary_key(PG_FUNCTION_ARGS)
int fnumber = SPI_fnumber(tupdesc, args[i]);
/* Bad guys may give us un-existing column in CREATE TRIGGER */
- if (fnumber < 0)
+ if (fnumber <= 0)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("there is no attribute \"%s\" in relation \"%s\"",
@@ -362,7 +362,7 @@ check_foreign_key(PG_FUNCTION_ARGS)
int fnumber = SPI_fnumber(tupdesc, args[i]);
/* Bad guys may give us un-existing column in CREATE TRIGGER */
- if (fnumber < 0)
+ if (fnumber <= 0)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("there is no attribute \"%s\" in relation \"%s\"",
@@ -469,6 +469,7 @@ check_foreign_key(PG_FUNCTION_ARGS)
char *type;
fn = SPI_fnumber(tupdesc, args_temp[k - 1]);
+ Assert(fn > 0); /* already checked above */
nv = SPI_getvalue(newtuple, tupdesc, fn);
type = SPI_gettype(tupdesc, fn);
diff --git a/contrib/spi/timetravel.c b/contrib/spi/timetravel.c
index 5a345841c6..30dcfd4d3e 100644
--- a/contrib/spi/timetravel.c
+++ b/contrib/spi/timetravel.c
@@ -157,7 +157,7 @@ timetravel(PG_FUNCTION_ARGS)
for (i = 0; i < MinAttrNum; i++)
{
attnum[i] = SPI_fnumber(tupdesc, args[i]);
- if (attnum[i] < 0)
+ if (attnum[i] <= 0)
elog(ERROR, "timetravel (%s): there is no attribute %s", relname, args[i]);
if (SPI_gettypeid(tupdesc, attnum[i]) != ABSTIMEOID)
elog(ERROR, "timetravel (%s): attribute %s must be of abstime type",
@@ -166,7 +166,7 @@ timetravel(PG_FUNCTION_ARGS)
for (; i < argc; i++)
{
attnum[i] = SPI_fnumber(tupdesc, args[i]);
- if (attnum[i] < 0)
+ if (attnum[i] <= 0)
elog(ERROR, "timetravel (%s): there is no attribute %s", relname, args[i]);
if (SPI_gettypeid(tupdesc, attnum[i]) != TEXTOID)
elog(ERROR, "timetravel (%s): attribute %s must be of text type",
diff --git a/doc/src/sgml/spi.sgml b/doc/src/sgml/spi.sgml
index 9ae7126ae7..817a5d0120 100644
--- a/doc/src/sgml/spi.sgml
+++ b/doc/src/sgml/spi.sgml
@@ -2891,7 +2891,7 @@ int SPI_fnumber(TupleDesc rowdesc, const char * Return Value
- Column number (count starts at 1), or
+ Column number (count starts at 1 for user-defined columns), or
SPI_ERROR_NOATTRIBUTE if the named column was not
found.
diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c
index 38767ae4ce..8e650bc412 100644
--- a/src/backend/executor/spi.c
+++ b/src/backend/executor/spi.c
@@ -824,7 +824,8 @@ SPI_fnumber(TupleDesc tupdesc, const char *fname)
for (res = 0; res < tupdesc->natts; res++)
{
- if (namestrcmp(&tupdesc->attrs[res]->attname, fname) == 0)
+ if (namestrcmp(&tupdesc->attrs[res]->attname, fname) == 0 &&
+ !tupdesc->attrs[res]->attisdropped)
return res + 1;
}
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index ad5a254c57..0e9ae5ff9c 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -2242,6 +2242,7 @@ tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("tsvector column \"%s\" does not exist",
trigger->tgargs[0])));
+ /* This will effectively reject system columns, so no separate test: */
if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
TSVECTOROID))
ereport(ERROR,
diff --git a/src/pl/plperl/plperl.c b/src/pl/plperl/plperl.c
index 4d993e7371..461986cda3 100644
--- a/src/pl/plperl/plperl.c
+++ b/src/pl/plperl/plperl.c
@@ -1062,11 +1062,16 @@ plperl_build_tuple_result(HV *perlhash, TupleDesc td)
char *key = hek2cstr(he);
int attn = SPI_fnumber(td, key);
- if (attn <= 0 || td->attrs[attn - 1]->attisdropped)
+ if (attn == SPI_ERROR_NOATTRIBUTE)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("Perl hash contains nonexistent column \"%s\"",
key)));
+ if (attn <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot set system attribute \"%s\"",
+ key)));
values[attn - 1] = plperl_sv_to_datum(val,
td->attrs[attn - 1]->atttypid,
diff --git a/src/pl/tcl/pltcl.c b/src/pl/tcl/pltcl.c
index 3e52113ee2..20809102ef 100644
--- a/src/pl/tcl/pltcl.c
+++ b/src/pl/tcl/pltcl.c
@@ -603,6 +603,7 @@ pltcl_init_load_unknown(Tcl_Interp *interp)
* leave this code as DString - it's only executed once per session
************************************************************/
fno = SPI_fnumber(SPI_tuptable->tupdesc, "modsrc");
+ Assert(fno > 0);
Tcl_DStringInit(&unknown_src);
@@ -1259,12 +1260,6 @@ pltcl_trigger_handler(PG_FUNCTION_ARGS, pltcl_call_state *call_state,
errmsg("cannot set system attribute \"%s\"",
ret_name)));
- /************************************************************
- * Ignore dropped columns
- ************************************************************/
- if (tupdesc->attrs[attnum - 1]->attisdropped)
- continue;
-
/************************************************************
* Lookup the attribute type's input function
************************************************************/
@@ -3077,10 +3072,6 @@ pltcl_build_tuple_result(Tcl_Interp *interp, Tcl_Obj **kvObjv, int kvObjc,
errmsg("cannot set system attribute \"%s\"",
fieldName)));
- /* Ignore dropped attributes */
- if (call_state->ret_tupdesc->attrs[attn - 1]->attisdropped)
- continue;
-
values[attn - 1] = utf_e2u(Tcl_GetString(kvObjv[i + 1]));
}
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index e7826a4513..119a59ab07 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -523,11 +523,12 @@ ttdummy(PG_FUNCTION_ARGS)
for (i = 0; i < 2; i++)
{
attnum[i] = SPI_fnumber(tupdesc, args[i]);
- if (attnum[i] < 0)
- elog(ERROR, "ttdummy (%s): there is no attribute %s", relname, args[i]);
+ if (attnum[i] <= 0)
+ elog(ERROR, "ttdummy (%s): there is no attribute %s",
+ relname, args[i]);
if (SPI_gettypeid(tupdesc, attnum[i]) != INT4OID)
- elog(ERROR, "ttdummy (%s): attributes %s and %s must be of abstime type",
- relname, args[0], args[1]);
+ elog(ERROR, "ttdummy (%s): attribute %s must be of integer type",
+ relname, args[i]);
}
oldon = SPI_getbinval(trigtuple, tupdesc, attnum[0], &isnull);
--
cgit v1.2.3
From 9257f0787257022e31c61cd77449127adfccf37f Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Tue, 8 Nov 2016 15:36:36 -0500
Subject: Replace uses of SPI_modifytuple that intend to allocate in current
context.
Invent a new function heap_modify_tuple_by_cols() that is functionally
equivalent to SPI_modifytuple except that it always allocates its result
by simple palloc. I chose however to make the API details a bit more
like heap_modify_tuple: pass a tupdesc rather than a Relation, and use
bool convention for the isnull array.
Use this function in place of SPI_modifytuple at all call sites where the
intended behavior is to allocate in current context. (There actually are
only two call sites left that depend on the old behavior, which makes me
wonder if we should just drop this function rather than keep it.)
This new function is easier to use than heap_modify_tuple() for purposes
of replacing a single column (or, really, any fixed number of columns).
There are a number of places where it would simplify the code to change
over, but I resisted that temptation for the moment ... everywhere except
in plpgsql's exec_assign_value(); changing that might offer some small
performance benefit, so I did it.
This is on the way to removing SPI_push/SPI_pop, but it seems like
good code cleanup in its own right.
Discussion: <9633.1478552022@sss.pgh.pa.us>
---
contrib/spi/autoinc.c | 13 ++++---
contrib/spi/insert_username.c | 12 +++----
contrib/spi/moddatetime.c | 21 ++++-------
contrib/spi/timetravel.c | 25 ++++++-------
doc/src/sgml/spi.sgml | 5 +--
src/backend/access/common/heaptuple.c | 66 +++++++++++++++++++++++++++++++++++
src/backend/utils/adt/tsvector_op.c | 16 ++++-----
src/include/access/htup_details.h | 6 ++++
src/pl/plpgsql/src/pl_exec.c | 57 ++++++++++--------------------
src/test/regress/regress.c | 11 ++----
10 files changed, 137 insertions(+), 95 deletions(-)
(limited to 'doc/src')
diff --git a/contrib/spi/autoinc.c b/contrib/spi/autoinc.c
index fc657a7c06..54f85a3709 100644
--- a/contrib/spi/autoinc.c
+++ b/contrib/spi/autoinc.c
@@ -3,6 +3,7 @@
*/
#include "postgres.h"
+#include "access/htup_details.h"
#include "catalog/pg_type.h"
#include "commands/sequence.h"
#include "commands/trigger.h"
@@ -23,6 +24,7 @@ autoinc(PG_FUNCTION_ARGS)
int *chattrs; /* attnums of attributes to change */
int chnattrs = 0; /* # of above */
Datum *newvals; /* vals of above */
+ bool *newnulls; /* null flags for above */
char **args; /* arguments */
char *relname; /* triggered relation name */
Relation rel; /* triggered relation */
@@ -64,6 +66,7 @@ autoinc(PG_FUNCTION_ARGS)
chattrs = (int *) palloc(nargs / 2 * sizeof(int));
newvals = (Datum *) palloc(nargs / 2 * sizeof(Datum));
+ newnulls = (bool *) palloc(nargs / 2 * sizeof(bool));
for (i = 0; i < nargs;)
{
@@ -102,6 +105,7 @@ autoinc(PG_FUNCTION_ARGS)
newvals[chnattrs] = DirectFunctionCall1(nextval, seqname);
newvals[chnattrs] = Int32GetDatum((int32) DatumGetInt64(newvals[chnattrs]));
}
+ newnulls[chnattrs] = false;
pfree(DatumGetTextP(seqname));
chnattrs++;
i++;
@@ -109,16 +113,15 @@ autoinc(PG_FUNCTION_ARGS)
if (chnattrs > 0)
{
- rettuple = SPI_modifytuple(rel, rettuple, chnattrs, chattrs, newvals, NULL);
- if (rettuple == NULL)
- /* internal error */
- elog(ERROR, "autoinc (%s): %d returned by SPI_modifytuple",
- relname, SPI_result);
+ rettuple = heap_modify_tuple_by_cols(rettuple, tupdesc,
+ chnattrs, chattrs,
+ newvals, newnulls);
}
pfree(relname);
pfree(chattrs);
pfree(newvals);
+ pfree(newnulls);
return PointerGetDatum(rettuple);
}
diff --git a/contrib/spi/insert_username.c b/contrib/spi/insert_username.c
index 617c60a81c..a2e1747ff7 100644
--- a/contrib/spi/insert_username.c
+++ b/contrib/spi/insert_username.c
@@ -1,6 +1,4 @@
/*
- * insert_username.c
- * $Modified: Thu Oct 16 08:13:42 1997 by brook $
* contrib/spi/insert_username.c
*
* insert user name in response to a trigger
@@ -8,6 +6,7 @@
*/
#include "postgres.h"
+#include "access/htup_details.h"
#include "catalog/pg_type.h"
#include "commands/trigger.h"
#include "executor/spi.h"
@@ -26,6 +25,7 @@ insert_username(PG_FUNCTION_ARGS)
Trigger *trigger; /* to get trigger name */
int nargs; /* # of arguments */
Datum newval; /* new value of column */
+ bool newnull; /* null flag */
char **args; /* arguments */
char *relname; /* triggered relation name */
Relation rel; /* triggered relation */
@@ -80,13 +80,11 @@ insert_username(PG_FUNCTION_ARGS)
/* create fields containing name */
newval = CStringGetTextDatum(GetUserNameFromId(GetUserId(), false));
+ newnull = false;
/* construct new tuple */
- rettuple = SPI_modifytuple(rel, rettuple, 1, &attnum, &newval, NULL);
- if (rettuple == NULL)
- /* internal error */
- elog(ERROR, "insert_username (\"%s\"): %d returned by SPI_modifytuple",
- relname, SPI_result);
+ rettuple = heap_modify_tuple_by_cols(rettuple, tupdesc,
+ 1, &attnum, &newval, &newnull);
pfree(relname);
diff --git a/contrib/spi/moddatetime.c b/contrib/spi/moddatetime.c
index cd700fe6d1..2d1f22c4e1 100644
--- a/contrib/spi/moddatetime.c
+++ b/contrib/spi/moddatetime.c
@@ -15,6 +15,7 @@ OH, me, I'm Terry Mackintosh
*/
#include "postgres.h"
+#include "access/htup_details.h"
#include "catalog/pg_type.h"
#include "executor/spi.h"
#include "commands/trigger.h"
@@ -34,6 +35,7 @@ moddatetime(PG_FUNCTION_ARGS)
int attnum; /* positional number of field to change */
Oid atttypid; /* type OID of field to change */
Datum newdt; /* The current datetime. */
+ bool newdtnull; /* null flag for it */
char **args; /* arguments */
char *relname; /* triggered relation name */
Relation rel; /* triggered relation */
@@ -115,22 +117,13 @@ moddatetime(PG_FUNCTION_ARGS)
args[0], relname)));
newdt = (Datum) 0; /* keep compiler quiet */
}
+ newdtnull = false;
-/* 1 is the number of items in the arrays attnum and newdt.
- attnum is the positional number of the field to be updated.
- newdt is the new datetime stamp.
- NOTE that attnum and newdt are not arrays, but then a 1 element array
- is not an array any more then they are. Thus, they can be considered a
- one element array.
-*/
- rettuple = SPI_modifytuple(rel, rettuple, 1, &attnum, &newdt, NULL);
-
- if (rettuple == NULL)
- /* internal error */
- elog(ERROR, "moddatetime (%s): %d returned by SPI_modifytuple",
- relname, SPI_result);
+ /* Replace the attnum'th column with newdt */
+ rettuple = heap_modify_tuple_by_cols(rettuple, tupdesc,
+ 1, &attnum, &newdt, &newdtnull);
-/* Clean up */
+ /* Clean up */
pfree(relname);
return PointerGetDatum(rettuple);
diff --git a/contrib/spi/timetravel.c b/contrib/spi/timetravel.c
index 30dcfd4d3e..2733aa231e 100644
--- a/contrib/spi/timetravel.c
+++ b/contrib/spi/timetravel.c
@@ -11,6 +11,7 @@
#include
+#include "access/htup_details.h"
#include "catalog/pg_type.h"
#include "commands/trigger.h"
#include "executor/spi.h"
@@ -183,13 +184,13 @@ timetravel(PG_FUNCTION_ARGS)
int chnattrs = 0;
int chattrs[MaxAttrNum];
Datum newvals[MaxAttrNum];
- char newnulls[MaxAttrNum];
+ bool newnulls[MaxAttrNum];
oldtimeon = SPI_getbinval(trigtuple, tupdesc, attnum[a_time_on], &isnull);
if (isnull)
{
newvals[chnattrs] = GetCurrentAbsoluteTime();
- newnulls[chnattrs] = ' ';
+ newnulls[chnattrs] = false;
chattrs[chnattrs] = attnum[a_time_on];
chnattrs++;
}
@@ -201,7 +202,7 @@ timetravel(PG_FUNCTION_ARGS)
(chnattrs > 0 && DatumGetInt32(newvals[a_time_on]) >= NOEND_ABSTIME))
elog(ERROR, "timetravel (%s): %s is infinity", relname, args[a_time_on]);
newvals[chnattrs] = NOEND_ABSTIME;
- newnulls[chnattrs] = ' ';
+ newnulls[chnattrs] = false;
chattrs[chnattrs] = attnum[a_time_off];
chnattrs++;
}
@@ -220,21 +221,23 @@ timetravel(PG_FUNCTION_ARGS)
{
/* clear update_user value */
newvals[chnattrs] = nulltext;
- newnulls[chnattrs] = 'n';
+ newnulls[chnattrs] = true;
chattrs[chnattrs] = attnum[a_upd_user];
chnattrs++;
/* clear delete_user value */
newvals[chnattrs] = nulltext;
- newnulls[chnattrs] = 'n';
+ newnulls[chnattrs] = true;
chattrs[chnattrs] = attnum[a_del_user];
chnattrs++;
/* set insert_user value */
newvals[chnattrs] = newuser;
- newnulls[chnattrs] = ' ';
+ newnulls[chnattrs] = false;
chattrs[chnattrs] = attnum[a_ins_user];
chnattrs++;
}
- rettuple = SPI_modifytuple(rel, trigtuple, chnattrs, chattrs, newvals, newnulls);
+ rettuple = heap_modify_tuple_by_cols(trigtuple, tupdesc,
+ chnattrs, chattrs,
+ newvals, newnulls);
return PointerGetDatum(rettuple);
/* end of INSERT */
}
@@ -395,13 +398,11 @@ timetravel(PG_FUNCTION_ARGS)
chnattrs++;
}
- rettuple = SPI_modifytuple(rel, newtuple, chnattrs, chattrs, newvals, newnulls);
-
/*
- * SPI_copytuple allocates tmptuple in upper executor context - have
- * to free allocation using SPI_pfree
+ * Use SPI_modifytuple() here because we are inside SPI environment
+ * but rettuple must be allocated in caller's context.
*/
- /* SPI_pfree(tmptuple); */
+ rettuple = SPI_modifytuple(rel, newtuple, chnattrs, chattrs, newvals, newnulls);
}
else
/* DELETE case */
diff --git a/doc/src/sgml/spi.sgml b/doc/src/sgml/spi.sgml
index 817a5d0120..39133c9038 100644
--- a/doc/src/sgml/spi.sgml
+++ b/doc/src/sgml/spi.sgml
@@ -3382,8 +3382,9 @@ char * SPI_getnspname(Relation rel)
repalloc, or SPI utility functions (except for
SPI_copytuple,
SPI_returntuple,
- SPI_modifytuple, and
- SPI_palloc) are made in this context. When a
+ SPI_modifytuple,
+ SPI_palloc, and
+ SPI_datumTransfer) are made in this context. When a
procedure disconnects from the SPI manager (via
SPI_finish) the current context is restored to
the upper executor context, and all allocations made in the
diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c
index 6d0f3f3767..e27ec78b71 100644
--- a/src/backend/access/common/heaptuple.c
+++ b/src/backend/access/common/heaptuple.c
@@ -846,6 +846,72 @@ heap_modify_tuple(HeapTuple tuple,
return newTuple;
}
+/*
+ * heap_modify_tuple_by_cols
+ * form a new tuple from an old tuple and a set of replacement values.
+ *
+ * This is like heap_modify_tuple, except that instead of specifying which
+ * column(s) to replace by a boolean map, an array of target column numbers
+ * is used. This is often more convenient when a fixed number of columns
+ * are to be replaced. The replCols, replValues, and replIsnull arrays must
+ * be of length nCols. Target column numbers are indexed from 1.
+ *
+ * The result is allocated in the current memory context.
+ */
+HeapTuple
+heap_modify_tuple_by_cols(HeapTuple tuple,
+ TupleDesc tupleDesc,
+ int nCols,
+ int *replCols,
+ Datum *replValues,
+ bool *replIsnull)
+{
+ int numberOfAttributes = tupleDesc->natts;
+ Datum *values;
+ bool *isnull;
+ HeapTuple newTuple;
+ int i;
+
+ /*
+ * allocate and fill values and isnull arrays from the tuple, then replace
+ * selected columns from the input arrays.
+ */
+ values = (Datum *) palloc(numberOfAttributes * sizeof(Datum));
+ isnull = (bool *) palloc(numberOfAttributes * sizeof(bool));
+
+ heap_deform_tuple(tuple, tupleDesc, values, isnull);
+
+ for (i = 0; i < nCols; i++)
+ {
+ int attnum = replCols[i];
+
+ if (attnum <= 0 || attnum > numberOfAttributes)
+ elog(ERROR, "invalid column number %d", attnum);
+ values[attnum - 1] = replValues[i];
+ isnull[attnum - 1] = replIsnull[i];
+ }
+
+ /*
+ * create a new tuple from the values and isnull arrays
+ */
+ newTuple = heap_form_tuple(tupleDesc, values, isnull);
+
+ pfree(values);
+ pfree(isnull);
+
+ /*
+ * copy the identification info of the old tuple: t_ctid, t_self, and OID
+ * (if any)
+ */
+ newTuple->t_data->t_ctid = tuple->t_data->t_ctid;
+ newTuple->t_self = tuple->t_self;
+ newTuple->t_tableOid = tuple->t_tableOid;
+ if (tupleDesc->tdhasoid)
+ HeapTupleSetOid(newTuple, HeapTupleGetOid(tuple));
+
+ return newTuple;
+}
+
/*
* heap_deform_tuple
* Given a tuple, extract data into values/isnull arrays; this is
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index 0e9ae5ff9c..c9d5060f2c 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -2329,8 +2329,10 @@ tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
if (prs.curwords)
{
datum = PointerGetDatum(make_tsvector(&prs));
- rettuple = SPI_modifytuple(rel, rettuple, 1, &tsvector_attr_num,
- &datum, NULL);
+ isnull = false;
+ rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
+ 1, &tsvector_attr_num,
+ &datum, &isnull);
pfree(DatumGetPointer(datum));
}
else
@@ -2340,14 +2342,12 @@ tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
SET_VARSIZE(out, CALCDATASIZE(0, 0));
out->size = 0;
datum = PointerGetDatum(out);
- rettuple = SPI_modifytuple(rel, rettuple, 1, &tsvector_attr_num,
- &datum, NULL);
+ isnull = false;
+ rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
+ 1, &tsvector_attr_num,
+ &datum, &isnull);
pfree(prs.words);
}
- if (rettuple == NULL) /* internal error */
- elog(ERROR, "tsvector_update_trigger: %d returned by SPI_modifytuple",
- SPI_result);
-
return PointerGetDatum(rettuple);
}
diff --git a/src/include/access/htup_details.h b/src/include/access/htup_details.h
index d7e5fad11e..8fb1f6ddea 100644
--- a/src/include/access/htup_details.h
+++ b/src/include/access/htup_details.h
@@ -805,6 +805,12 @@ extern HeapTuple heap_modify_tuple(HeapTuple tuple,
Datum *replValues,
bool *replIsnull,
bool *doReplace);
+extern HeapTuple heap_modify_tuple_by_cols(HeapTuple tuple,
+ TupleDesc tupleDesc,
+ int nCols,
+ int *replCols,
+ Datum *replValues,
+ bool *replIsnull);
extern void heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc,
Datum *values, bool *isnull);
extern void heap_freetuple(HeapTuple htup);
diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c
index 042b31fd77..91e1f8dd3f 100644
--- a/src/pl/plpgsql/src/pl_exec.c
+++ b/src/pl/plpgsql/src/pl_exec.c
@@ -4562,10 +4562,9 @@ exec_assign_value(PLpgSQL_execstate *estate,
PLpgSQL_rec *rec;
int fno;
HeapTuple newtup;
- int natts;
- Datum *values;
- bool *nulls;
- bool *replaces;
+ int colnums[1];
+ Datum values[1];
+ bool nulls[1];
Oid atttype;
int32 atttypmod;
@@ -4584,9 +4583,8 @@ exec_assign_value(PLpgSQL_execstate *estate,
errdetail("The tuple structure of a not-yet-assigned record is indeterminate.")));
/*
- * Get the number of the records field to change and the
- * number of attributes in the tuple. Note: disallow system
- * column names because the code below won't cope.
+ * Get the number of the record field to change. Disallow
+ * system columns because the code below won't cope.
*/
fno = SPI_fnumber(rec->tupdesc, recfield->fieldname);
if (fno <= 0)
@@ -4594,42 +4592,25 @@ exec_assign_value(PLpgSQL_execstate *estate,
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("record \"%s\" has no field \"%s\"",
rec->refname, recfield->fieldname)));
- fno--;
- natts = rec->tupdesc->natts;
-
- /*
- * Set up values/control arrays for heap_modify_tuple. For all
- * the attributes except the one we want to replace, use the
- * value that's in the old tuple.
- */
- values = eval_mcontext_alloc(estate, sizeof(Datum) * natts);
- nulls = eval_mcontext_alloc(estate, sizeof(bool) * natts);
- replaces = eval_mcontext_alloc(estate, sizeof(bool) * natts);
-
- memset(replaces, false, sizeof(bool) * natts);
- replaces[fno] = true;
+ colnums[0] = fno;
/*
* Now insert the new value, being careful to cast it to the
* right type.
*/
- atttype = rec->tupdesc->attrs[fno]->atttypid;
- atttypmod = rec->tupdesc->attrs[fno]->atttypmod;
- values[fno] = exec_cast_value(estate,
- value,
- &isNull,
- valtype,
- valtypmod,
- atttype,
- atttypmod);
- nulls[fno] = isNull;
-
- /*
- * Now call heap_modify_tuple() to create a new tuple that
- * replaces the old one in the record.
- */
- newtup = heap_modify_tuple(rec->tup, rec->tupdesc,
- values, nulls, replaces);
+ atttype = rec->tupdesc->attrs[fno - 1]->atttypid;
+ atttypmod = rec->tupdesc->attrs[fno - 1]->atttypmod;
+ values[0] = exec_cast_value(estate,
+ value,
+ &isNull,
+ valtype,
+ valtypmod,
+ atttype,
+ atttypmod);
+ nulls[0] = isNull;
+
+ newtup = heap_modify_tuple_by_cols(rec->tup, rec->tupdesc,
+ 1, colnums, values, nulls);
if (rec->freetup)
heap_freetuple(rec->tup);
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index 119a59ab07..32703fcdcf 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -639,15 +639,8 @@ ttdummy(PG_FUNCTION_ARGS)
/* Tuple to return to upper Executor ... */
if (newtuple) /* UPDATE */
- {
- HeapTuple tmptuple;
-
- tmptuple = SPI_copytuple(trigtuple);
- rettuple = SPI_modifytuple(rel, tmptuple, 1, &(attnum[1]), &newoff, NULL);
- SPI_freetuple(tmptuple);
- }
- else
- /* DELETE */
+ rettuple = SPI_modifytuple(rel, trigtuple, 1, &(attnum[1]), &newoff, NULL);
+ else /* DELETE */
rettuple = trigtuple;
SPI_finish(); /* don't forget say Bye to SPI mgr */
--
cgit v1.2.3
From 1833f1a1c3b0e12b3ea40d49bf11898eedae5248 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Tue, 8 Nov 2016 17:39:45 -0500
Subject: Simplify code by getting rid of SPI_push, SPI_pop,
SPI_restore_connection.
The idea behind SPI_push was to allow transitioning back into an
"unconnected" state when a SPI-using procedure calls unrelated code that
might or might not invoke SPI. That sounds good, but in practice the only
thing it does for us is to catch cases where a called SPI-using function
forgets to call SPI_connect --- which is a highly improbable failure mode,
since it would be exposed immediately by direct testing of said function.
As against that, we've had multiple bugs induced by forgetting to call
SPI_push/SPI_pop around code that might invoke SPI-using functions; these
are much harder to catch and indeed have gone undetected for years in some
cases. And we've had to band-aid around some problems of this ilk by
introducing conditional push/pop pairs in some places, which really kind
of defeats the purpose altogether; if we can't draw bright lines between
connected and unconnected code, what's the point?
Hence, get rid of SPI_push[_conditional], SPI_pop[_conditional], and the
underlying state variable _SPI_curid. It turns out SPI_restore_connection
can go away too, which is a nice side benefit since it was never more than
a kluge. Provide no-op macros for the deleted functions so as to avoid an
API break for external modules.
A side effect of this removal is that SPI_palloc and allied functions no
longer permit being called when unconnected; they'll throw an error
instead. The apparent usefulness of the previous behavior was a mirage
as well, because it was depended on by only a few places (which I fixed in
preceding commits), and it posed a risk of allocations being unexpectedly
long-lived if someone forgot a SPI_push call.
Discussion: <20808.1478481403@sss.pgh.pa.us>
---
doc/src/sgml/spi.sgml | 180 ++++++++-----------------------
src/backend/executor/spi.c | 200 +++++++++--------------------------
src/backend/utils/adt/xml.c | 6 --
src/backend/utils/cache/plancache.c | 13 ---
src/backend/utils/fmgr/fmgr.c | 48 +--------
src/include/executor/spi.h | 12 ++-
src/pl/plperl/plperl.c | 78 --------------
src/pl/plpgsql/src/pl_exec.c | 21 ----
src/pl/plpython/plpy_exec.c | 2 -
src/pl/plpython/plpy_spi.c | 13 ---
src/pl/plpython/plpy_subxactobject.c | 7 --
src/pl/tcl/pltcl.c | 18 ----
12 files changed, 105 insertions(+), 493 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/spi.sgml b/doc/src/sgml/spi.sgml
index 39133c9038..836ce0822f 100644
--- a/doc/src/sgml/spi.sgml
+++ b/doc/src/sgml/spi.sgml
@@ -90,21 +90,6 @@ int SPI_connect(void)
function if you want to execute commands through SPI. Some utility
SPI functions can be called from unconnected procedures.
-
-
- If your procedure is already connected,
- SPI_connect will return the error code
- SPI_ERROR_CONNECT. This could happen if
- a procedure that has called SPI_connect
- directly calls another procedure that calls
- SPI_connect. While recursive calls to the
- SPI manager are permitted when an SQL command
- called through SPI invokes another function that uses
- SPI, directly nested calls to
- SPI_connect and
- SPI_finish are forbidden.
- (But see SPI_push and SPI_pop.)
-
@@ -164,13 +149,6 @@ int SPI_finish(void)
abort the transaction via elog(ERROR). In that
case SPI will clean itself up automatically.
-
-
- If SPI_finish is called without having a valid
- connection, it will return SPI_ERROR_UNCONNECTED.
- There is no fundamental problem with this; it means that the SPI
- manager has nothing to do.
-
@@ -200,86 +178,6 @@ int SPI_finish(void)
-
- SPI_push
-
-
- SPI_push
- 3
-
-
-
- SPI_push
- push SPI stack to allow recursive SPI usage
-
-
-
-
-void SPI_push(void)
-
-
-
-
- Description
-
-
- SPI_push should be called before executing another
- procedure that might itself wish to use SPI.
- After SPI_push, SPI is no longer in a
- connected> state, and SPI function calls will be rejected unless
- a fresh SPI_connect is done. This ensures a clean
- separation between your procedure's SPI state and that of another procedure
- you call. After the other procedure returns, call
- SPI_pop to restore access to your own SPI state.
-
-
-
- Note that SPI_execute and related functions
- automatically do the equivalent of SPI_push before
- passing control back to the SQL execution engine, so it is not necessary
- for you to worry about this when using those functions.
- Only when you are directly calling arbitrary code that might contain
- SPI_connect calls do you need to issue
- SPI_push and SPI_pop.
-
-
-
-
-
-
-
-
- SPI_pop
-
-
- SPI_pop
- 3
-
-
-
- SPI_pop
- pop SPI stack to return from recursive SPI usage
-
-
-
-
-void SPI_pop(void)
-
-
-
-
- Description
-
-
- SPI_pop pops the previous environment from the
- SPI call stack. See SPI_push.
-
-
-
-
-
-
-
SPI_execute
@@ -3361,17 +3259,8 @@ char * SPI_getnspname(Relation rel)
upper executor context, that is, the memory context
that was current when SPI_connect was called,
which is precisely the right context for a value returned from your
- procedure.
-
-
-
- If SPI_palloc is called while the procedure is
- not connected to SPI, then it acts the same as a normal
- palloc. Before a procedure connects to the
- SPI manager, the current memory context is the upper executor
- context, so all allocations made by the procedure via
- palloc or by SPI utility functions are made in
- this context.
+ procedure. Several of the other utility procedures described in
+ this section also return objects created in the upper executor context.
@@ -3379,25 +3268,14 @@ char * SPI_getnspname(Relation rel)
context of the procedure, which is created by
SPI_connect, is made the current context. All
allocations made by palloc,
- repalloc, or SPI utility functions (except for
- SPI_copytuple,
- SPI_returntuple,
- SPI_modifytuple,
- SPI_palloc, and
- SPI_datumTransfer) are made in this context. When a
+ repalloc, or SPI utility functions (except as
+ described in this section) are made in this context. When a
procedure disconnects from the SPI manager (via
SPI_finish) the current context is restored to
the upper executor context, and all allocations made in the
procedure memory context are freed and cannot be used any more.
-
- All functions described in this section can be used by both
- connected and unconnected procedures. In an unconnected procedure,
- they act the same as the underlying ordinary server functions
- (palloc>, etc.).
-
-
@@ -3426,6 +3304,11 @@ void * SPI_palloc(Size size)
SPI_palloc allocates memory in the upper
executor context.
+
+
+ This function can only be used while connected to SPI.
+ Otherwise, it throws an error.
+
@@ -3605,6 +3488,12 @@ HeapTuple SPI_copytuple(HeapTuple row)
row from a trigger. In a function declared to return a composite
type, use SPI_returntuple instead.
+
+
+ This function can only be used while connected to SPI.
+ Otherwise, it returns NULL and sets SPI_result to
+ SPI_ERROR_UNCONNECTED.
+
@@ -3626,8 +3515,8 @@ HeapTuple SPI_copytuple(HeapTuple row)
Return Value
- the copied row; NULL only if
- tuple is NULL
+ the copied row, or NULL on error
+ (see SPI_result for an error indication)
@@ -3663,6 +3552,12 @@ HeapTupleHeader SPI_returntuple(HeapTuple row, TupleDesc
before returning.
+
+ This function can only be used while connected to SPI.
+ Otherwise, it returns NULL and sets SPI_result to
+ SPI_ERROR_UNCONNECTED.
+
+
Note that this should be used for functions that are declared to return
composite types. It is not used for triggers; use
@@ -3699,10 +3594,9 @@ HeapTupleHeader SPI_returntuple(HeapTuple row, TupleDesc
Return Value
- HeapTupleHeader pointing to copied row;
- NULL only if
- row or rowdesc is
- NULL
+ HeapTupleHeader pointing to copied row,
+ or NULL on error
+ (see SPI_result for an error indication)
@@ -3736,6 +3630,13 @@ HeapTuple SPI_modifytuple(Relation rel, HeapTuple SPI_modifytuple creates a new row by
substituting new values for selected columns, copying the original
row's columns at other positions. The input row is not modified.
+ The new row is returned in the upper executor context.
+
+
+
+ This function can only be used while connected to SPI.
+ Otherwise, it returns NULL and sets SPI_result to
+ SPI_ERROR_UNCONNECTED.
@@ -3821,8 +3722,8 @@ HeapTuple SPI_modifytuple(Relation rel, HeapTuple
new row with modifications, allocated in the upper executor
- context; NULL only if row
- is NULL
+ context, or NULL on error
+ (see SPI_result for an error indication)
@@ -3845,11 +3746,20 @@ HeapTuple SPI_modifytuple(Relation rel, HeapTuple
if colnum> contains an invalid column number (less
- than or equal to 0 or greater than the number of column in
+ than or equal to 0 or greater than the number of columns in
row>)
+
+
+ SPI_ERROR_UNCONNECTED
+
+
+ if SPI is not active
+
+
+
diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c
index 8e650bc412..80fc4c4725 100644
--- a/src/backend/executor/spi.c
+++ b/src/backend/executor/spi.c
@@ -44,8 +44,7 @@ int SPI_result;
static _SPI_connection *_SPI_stack = NULL;
static _SPI_connection *_SPI_current = NULL;
static int _SPI_stack_depth = 0; /* allocated size of _SPI_stack */
-static int _SPI_connected = -1;
-static int _SPI_curid = -1;
+static int _SPI_connected = -1; /* current stack index */
static Portal SPI_cursor_open_internal(const char *name, SPIPlanPtr plan,
ParamListInfo paramLI, bool read_only);
@@ -86,13 +85,7 @@ SPI_connect(void)
{
int newdepth;
- /*
- * When procedure called by Executor _SPI_curid expected to be equal to
- * _SPI_connected
- */
- if (_SPI_curid != _SPI_connected)
- return SPI_ERROR_CONNECT;
-
+ /* Enlarge stack if necessary */
if (_SPI_stack == NULL)
{
if (_SPI_connected != -1 || _SPI_stack_depth != 0)
@@ -117,9 +110,7 @@ SPI_connect(void)
}
}
- /*
- * We're entering procedure where _SPI_curid == _SPI_connected - 1
- */
+ /* Enter new stack level */
_SPI_connected++;
Assert(_SPI_connected >= 0 && _SPI_connected < _SPI_stack_depth);
@@ -178,14 +169,9 @@ SPI_finish(void)
SPI_lastoid = InvalidOid;
SPI_tuptable = NULL;
- /*
- * After _SPI_begin_call _SPI_connected == _SPI_curid. Now we are closing
- * connection to SPI and returning to upper Executor and so _SPI_connected
- * must be equal to _SPI_curid.
- */
+ /* Exit stack level */
_SPI_connected--;
- _SPI_curid--;
- if (_SPI_connected == -1)
+ if (_SPI_connected < 0)
_SPI_current = NULL;
else
_SPI_current = &(_SPI_stack[_SPI_connected]);
@@ -212,7 +198,7 @@ AtEOXact_SPI(bool isCommit)
_SPI_current = _SPI_stack = NULL;
_SPI_stack_depth = 0;
- _SPI_connected = _SPI_curid = -1;
+ _SPI_connected = -1;
SPI_processed = 0;
SPI_lastoid = InvalidOid;
SPI_tuptable = NULL;
@@ -258,8 +244,7 @@ AtEOSubXact_SPI(bool isCommit, SubTransactionId mySubid)
* be already gone.
*/
_SPI_connected--;
- _SPI_curid = _SPI_connected;
- if (_SPI_connected == -1)
+ if (_SPI_connected < 0)
_SPI_current = NULL;
else
_SPI_current = &(_SPI_stack[_SPI_connected]);
@@ -313,53 +298,6 @@ AtEOSubXact_SPI(bool isCommit, SubTransactionId mySubid)
}
-/* Pushes SPI stack to allow recursive SPI calls */
-void
-SPI_push(void)
-{
- _SPI_curid++;
-}
-
-/* Pops SPI stack to allow recursive SPI calls */
-void
-SPI_pop(void)
-{
- _SPI_curid--;
-}
-
-/* Conditional push: push only if we're inside a SPI procedure */
-bool
-SPI_push_conditional(void)
-{
- bool pushed = (_SPI_curid != _SPI_connected);
-
- if (pushed)
- {
- _SPI_curid++;
- /* We should now be in a state where SPI_connect would succeed */
- Assert(_SPI_curid == _SPI_connected);
- }
- return pushed;
-}
-
-/* Conditional pop: pop only if SPI_push_conditional pushed */
-void
-SPI_pop_conditional(bool pushed)
-{
- /* We should be in a state where SPI_connect would succeed */
- Assert(_SPI_curid == _SPI_connected);
- if (pushed)
- _SPI_curid--;
-}
-
-/* Restore state of SPI stack after aborting a subtransaction */
-void
-SPI_restore_connection(void)
-{
- Assert(_SPI_connected >= 0);
- _SPI_curid = _SPI_connected - 1;
-}
-
/* Parse, plan, and execute a query string */
int
SPI_execute(const char *src, bool read_only, long tcount)
@@ -691,7 +629,7 @@ SPI_freeplan(SPIPlanPtr plan)
HeapTuple
SPI_copytuple(HeapTuple tuple)
{
- MemoryContext oldcxt = NULL;
+ MemoryContext oldcxt;
HeapTuple ctuple;
if (tuple == NULL)
@@ -700,17 +638,17 @@ SPI_copytuple(HeapTuple tuple)
return NULL;
}
- if (_SPI_curid + 1 == _SPI_connected) /* connected */
+ if (_SPI_current == NULL)
{
- if (_SPI_current != &(_SPI_stack[_SPI_curid + 1]))
- elog(ERROR, "SPI stack corrupted");
- oldcxt = MemoryContextSwitchTo(_SPI_current->savedcxt);
+ SPI_result = SPI_ERROR_UNCONNECTED;
+ return NULL;
}
+ oldcxt = MemoryContextSwitchTo(_SPI_current->savedcxt);
+
ctuple = heap_copytuple(tuple);
- if (oldcxt)
- MemoryContextSwitchTo(oldcxt);
+ MemoryContextSwitchTo(oldcxt);
return ctuple;
}
@@ -718,7 +656,7 @@ SPI_copytuple(HeapTuple tuple)
HeapTupleHeader
SPI_returntuple(HeapTuple tuple, TupleDesc tupdesc)
{
- MemoryContext oldcxt = NULL;
+ MemoryContext oldcxt;
HeapTupleHeader dtup;
if (tuple == NULL || tupdesc == NULL)
@@ -727,22 +665,22 @@ SPI_returntuple(HeapTuple tuple, TupleDesc tupdesc)
return NULL;
}
+ if (_SPI_current == NULL)
+ {
+ SPI_result = SPI_ERROR_UNCONNECTED;
+ return NULL;
+ }
+
/* For RECORD results, make sure a typmod has been assigned */
if (tupdesc->tdtypeid == RECORDOID &&
tupdesc->tdtypmod < 0)
assign_record_type_typmod(tupdesc);
- if (_SPI_curid + 1 == _SPI_connected) /* connected */
- {
- if (_SPI_current != &(_SPI_stack[_SPI_curid + 1]))
- elog(ERROR, "SPI stack corrupted");
- oldcxt = MemoryContextSwitchTo(_SPI_current->savedcxt);
- }
+ oldcxt = MemoryContextSwitchTo(_SPI_current->savedcxt);
dtup = DatumGetHeapTupleHeader(heap_copy_tuple_as_datum(tuple, tupdesc));
- if (oldcxt)
- MemoryContextSwitchTo(oldcxt);
+ MemoryContextSwitchTo(oldcxt);
return dtup;
}
@@ -751,7 +689,7 @@ HeapTuple
SPI_modifytuple(Relation rel, HeapTuple tuple, int natts, int *attnum,
Datum *Values, const char *Nulls)
{
- MemoryContext oldcxt = NULL;
+ MemoryContext oldcxt;
HeapTuple mtuple;
int numberOfAttributes;
Datum *v;
@@ -764,13 +702,16 @@ SPI_modifytuple(Relation rel, HeapTuple tuple, int natts, int *attnum,
return NULL;
}
- if (_SPI_curid + 1 == _SPI_connected) /* connected */
+ if (_SPI_current == NULL)
{
- if (_SPI_current != &(_SPI_stack[_SPI_curid + 1]))
- elog(ERROR, "SPI stack corrupted");
- oldcxt = MemoryContextSwitchTo(_SPI_current->savedcxt);
+ SPI_result = SPI_ERROR_UNCONNECTED;
+ return NULL;
}
+
+ oldcxt = MemoryContextSwitchTo(_SPI_current->savedcxt);
+
SPI_result = 0;
+
numberOfAttributes = rel->rd_att->natts;
v = (Datum *) palloc(numberOfAttributes * sizeof(Datum));
n = (bool *) palloc(numberOfAttributes * sizeof(bool));
@@ -810,8 +751,7 @@ SPI_modifytuple(Relation rel, HeapTuple tuple, int natts, int *attnum,
pfree(v);
pfree(n);
- if (oldcxt)
- MemoryContextSwitchTo(oldcxt);
+ MemoryContextSwitchTo(oldcxt);
return mtuple;
}
@@ -980,22 +920,10 @@ SPI_getnspname(Relation rel)
void *
SPI_palloc(Size size)
{
- MemoryContext oldcxt = NULL;
- void *pointer;
-
- if (_SPI_curid + 1 == _SPI_connected) /* connected */
- {
- if (_SPI_current != &(_SPI_stack[_SPI_curid + 1]))
- elog(ERROR, "SPI stack corrupted");
- oldcxt = MemoryContextSwitchTo(_SPI_current->savedcxt);
- }
-
- pointer = palloc(size);
-
- if (oldcxt)
- MemoryContextSwitchTo(oldcxt);
+ if (_SPI_current == NULL)
+ elog(ERROR, "SPI_palloc called while not connected to SPI");
- return pointer;
+ return MemoryContextAlloc(_SPI_current->savedcxt, size);
}
void *
@@ -1015,20 +943,17 @@ SPI_pfree(void *pointer)
Datum
SPI_datumTransfer(Datum value, bool typByVal, int typLen)
{
- MemoryContext oldcxt = NULL;
+ MemoryContext oldcxt;
Datum result;
- if (_SPI_curid + 1 == _SPI_connected) /* connected */
- {
- if (_SPI_current != &(_SPI_stack[_SPI_curid + 1]))
- elog(ERROR, "SPI stack corrupted");
- oldcxt = MemoryContextSwitchTo(_SPI_current->savedcxt);
- }
+ if (_SPI_current == NULL)
+ elog(ERROR, "SPI_datumTransfer called while not connected to SPI");
+
+ oldcxt = MemoryContextSwitchTo(_SPI_current->savedcxt);
result = datumTransfer(value, typByVal, typLen);
- if (oldcxt)
- MemoryContextSwitchTo(oldcxt);
+ MemoryContextSwitchTo(oldcxt);
return result;
}
@@ -1050,17 +975,12 @@ SPI_freetuptable(SPITupleTable *tuptable)
return;
/*
- * Since this function might be called during error recovery, it seems
- * best not to insist that the caller be actively connected. We just
- * search the topmost SPI context, connected or not.
+ * Search only the topmost SPI context for a matching tuple table.
*/
- if (_SPI_connected >= 0)
+ if (_SPI_current != NULL)
{
slist_mutable_iter siter;
- if (_SPI_current != &(_SPI_stack[_SPI_connected]))
- elog(ERROR, "SPI stack corrupted");
-
/* find tuptable in active list, then remove it */
slist_foreach_modify(siter, &_SPI_current->tuptables)
{
@@ -1168,13 +1088,9 @@ SPI_cursor_open_with_args(const char *name,
/* We needn't copy the plan; SPI_cursor_open_internal will do so */
- /* Adjust stack so that SPI_cursor_open_internal doesn't complain */
- _SPI_curid--;
-
result = SPI_cursor_open_internal(name, &plan, paramLI, read_only);
/* And clean up */
- _SPI_curid++;
_SPI_end_call(true);
return result;
@@ -1723,14 +1639,8 @@ spi_dest_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
MemoryContext oldcxt;
MemoryContext tuptabcxt;
- /*
- * When called by Executor _SPI_curid expected to be equal to
- * _SPI_connected
- */
- if (_SPI_curid != _SPI_connected || _SPI_connected < 0)
- elog(ERROR, "improper call to spi_dest_startup");
- if (_SPI_current != &(_SPI_stack[_SPI_curid]))
- elog(ERROR, "SPI stack corrupted");
+ if (_SPI_current == NULL)
+ elog(ERROR, "spi_dest_startup called while not connected to SPI");
if (_SPI_current->tuptable != NULL)
elog(ERROR, "improper call to spi_dest_startup");
@@ -1775,14 +1685,8 @@ spi_printtup(TupleTableSlot *slot, DestReceiver *self)
SPITupleTable *tuptable;
MemoryContext oldcxt;
- /*
- * When called by Executor _SPI_curid expected to be equal to
- * _SPI_connected
- */
- if (_SPI_curid != _SPI_connected || _SPI_connected < 0)
- elog(ERROR, "improper call to spi_printtup");
- if (_SPI_current != &(_SPI_stack[_SPI_curid]))
- elog(ERROR, "SPI stack corrupted");
+ if (_SPI_current == NULL)
+ elog(ERROR, "spi_printtup called while not connected to SPI");
tuptable = _SPI_current->tuptable;
if (tuptable == NULL)
@@ -2534,11 +2438,8 @@ _SPI_procmem(void)
static int
_SPI_begin_call(bool execmem)
{
- if (_SPI_curid + 1 != _SPI_connected)
+ if (_SPI_current == NULL)
return SPI_ERROR_UNCONNECTED;
- _SPI_curid++;
- if (_SPI_current != &(_SPI_stack[_SPI_curid]))
- elog(ERROR, "SPI stack corrupted");
if (execmem) /* switch to the Executor memory context */
_SPI_execmem();
@@ -2554,11 +2455,6 @@ _SPI_begin_call(bool execmem)
static int
_SPI_end_call(bool procmem)
{
- /*
- * We're returning to procedure where _SPI_curid == _SPI_connected - 1
- */
- _SPI_curid--;
-
if (procmem) /* switch to the procedure memory context */
{
_SPI_procmem();
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index b144920ec6..057c3bfd7c 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -2644,8 +2644,6 @@ schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
relid_list = schema_get_xml_visible_tables(nspid);
- SPI_push();
-
foreach(cell, relid_list)
{
Oid relid = lfirst_oid(cell);
@@ -2658,7 +2656,6 @@ schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
appendStringInfoChar(result, '\n');
}
- SPI_pop();
SPI_finish();
xmldata_root_element_end(result, xmlsn);
@@ -2822,8 +2819,6 @@ database_to_xml_internal(const char *xmlschema, bool nulls,
nspid_list = database_get_xml_visible_schemas();
- SPI_push();
-
foreach(cell, nspid_list)
{
Oid nspid = lfirst_oid(cell);
@@ -2836,7 +2831,6 @@ database_to_xml_internal(const char *xmlschema, bool nulls,
appendStringInfoChar(result, '\n');
}
- SPI_pop();
SPI_finish();
xmldata_root_element_end(result, xmlcn);
diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c
index c96a86500a..884cdab702 100644
--- a/src/backend/utils/cache/plancache.c
+++ b/src/backend/utils/cache/plancache.c
@@ -53,7 +53,6 @@
#include "access/transam.h"
#include "catalog/namespace.h"
#include "executor/executor.h"
-#include "executor/spi.h"
#include "miscadmin.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/cost.h"
@@ -878,7 +877,6 @@ BuildCachedPlan(CachedPlanSource *plansource, List *qlist,
CachedPlan *plan;
List *plist;
bool snapshot_set;
- bool spi_pushed;
bool is_transient;
MemoryContext plan_context;
MemoryContext oldcxt = CurrentMemoryContext;
@@ -926,22 +924,11 @@ BuildCachedPlan(CachedPlanSource *plansource, List *qlist,
snapshot_set = true;
}
- /*
- * The planner may try to call SPI-using functions, which causes a problem
- * if we're already inside one. Rather than expect all SPI-using code to
- * do SPI_push whenever a replan could happen, it seems best to take care
- * of the case here.
- */
- spi_pushed = SPI_push_conditional();
-
/*
* Generate the plan.
*/
plist = pg_plan_queries(qlist, plansource->cursor_options, boundParams);
- /* Clean up SPI state */
- SPI_pop_conditional(spi_pushed);
-
/* Release snapshot if we got one */
if (snapshot_set)
PopActiveSnapshot();
diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c
index 46a55ba7b9..3340b17d90 100644
--- a/src/backend/utils/fmgr/fmgr.c
+++ b/src/backend/utils/fmgr/fmgr.c
@@ -19,7 +19,6 @@
#include "catalog/pg_language.h"
#include "catalog/pg_proc.h"
#include "executor/functions.h"
-#include "executor/spi.h"
#include "lib/stringinfo.h"
#include "miscadmin.h"
#include "nodes/nodeFuncs.h"
@@ -1878,25 +1877,16 @@ OidFunctionCall9Coll(Oid functionId, Oid collation, Datum arg1, Datum arg2,
* the caller should assume the result is NULL, but we'll call the input
* function anyway if it's not strict. So this is almost but not quite
* the same as FunctionCall3.
- *
- * One important difference from the bare function call is that we will
- * push any active SPI context, allowing SPI-using I/O functions to be
- * called from other SPI functions without extra notation. This is a hack,
- * but the alternative of expecting all SPI functions to do SPI_push/SPI_pop
- * around I/O calls seems worse.
*/
Datum
InputFunctionCall(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod)
{
FunctionCallInfoData fcinfo;
Datum result;
- bool pushed;
if (str == NULL && flinfo->fn_strict)
return (Datum) 0; /* just return null result */
- pushed = SPI_push_conditional();
-
InitFunctionCallInfoData(fcinfo, flinfo, 3, InvalidOid, NULL, NULL);
fcinfo.arg[0] = CStringGetDatum(str);
@@ -1922,8 +1912,6 @@ InputFunctionCall(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod)
fcinfo.flinfo->fn_oid);
}
- SPI_pop_conditional(pushed);
-
return result;
}
@@ -1932,22 +1920,12 @@ InputFunctionCall(FmgrInfo *flinfo, char *str, Oid typioparam, int32 typmod)
*
* Do not call this on NULL datums.
*
- * This is almost just window dressing for FunctionCall1, but it includes
- * SPI context pushing for the same reasons as InputFunctionCall.
+ * This is currently little more than window dressing for FunctionCall1.
*/
char *
OutputFunctionCall(FmgrInfo *flinfo, Datum val)
{
- char *result;
- bool pushed;
-
- pushed = SPI_push_conditional();
-
- result = DatumGetCString(FunctionCall1(flinfo, val));
-
- SPI_pop_conditional(pushed);
-
- return result;
+ return DatumGetCString(FunctionCall1(flinfo, val));
}
/*
@@ -1956,8 +1934,7 @@ OutputFunctionCall(FmgrInfo *flinfo, Datum val)
* "buf" may be NULL to indicate we are reading a NULL. In this case
* the caller should assume the result is NULL, but we'll call the receive
* function anyway if it's not strict. So this is almost but not quite
- * the same as FunctionCall3. Also, this includes SPI context pushing for
- * the same reasons as InputFunctionCall.
+ * the same as FunctionCall3.
*/
Datum
ReceiveFunctionCall(FmgrInfo *flinfo, StringInfo buf,
@@ -1965,13 +1942,10 @@ ReceiveFunctionCall(FmgrInfo *flinfo, StringInfo buf,
{
FunctionCallInfoData fcinfo;
Datum result;
- bool pushed;
if (buf == NULL && flinfo->fn_strict)
return (Datum) 0; /* just return null result */
- pushed = SPI_push_conditional();
-
InitFunctionCallInfoData(fcinfo, flinfo, 3, InvalidOid, NULL, NULL);
fcinfo.arg[0] = PointerGetDatum(buf);
@@ -1997,8 +1971,6 @@ ReceiveFunctionCall(FmgrInfo *flinfo, StringInfo buf,
fcinfo.flinfo->fn_oid);
}
- SPI_pop_conditional(pushed);
-
return result;
}
@@ -2009,22 +1981,12 @@ ReceiveFunctionCall(FmgrInfo *flinfo, StringInfo buf,
*
* This is little more than window dressing for FunctionCall1, but it does
* guarantee a non-toasted result, which strictly speaking the underlying
- * function doesn't. Also, this includes SPI context pushing for the same
- * reasons as InputFunctionCall.
+ * function doesn't.
*/
bytea *
SendFunctionCall(FmgrInfo *flinfo, Datum val)
{
- bytea *result;
- bool pushed;
-
- pushed = SPI_push_conditional();
-
- result = DatumGetByteaP(FunctionCall1(flinfo, val));
-
- SPI_pop_conditional(pushed);
-
- return result;
+ return DatumGetByteaP(FunctionCall1(flinfo, val));
}
/*
diff --git a/src/include/executor/spi.h b/src/include/executor/spi.h
index 1792fb1217..76ba394a2b 100644
--- a/src/include/executor/spi.h
+++ b/src/include/executor/spi.h
@@ -59,6 +59,13 @@ typedef struct _SPI_plan *SPIPlanPtr;
#define SPI_OK_UPDATE_RETURNING 13
#define SPI_OK_REWRITTEN 14
+/* These used to be functions, now just no-ops for backwards compatibility */
+#define SPI_push() ((void) 0)
+#define SPI_pop() ((void) 0)
+#define SPI_push_conditional() false
+#define SPI_pop_conditional(pushed) ((void) 0)
+#define SPI_restore_connection() ((void) 0)
+
extern PGDLLIMPORT uint64 SPI_processed;
extern PGDLLIMPORT Oid SPI_lastoid;
extern PGDLLIMPORT SPITupleTable *SPI_tuptable;
@@ -66,11 +73,6 @@ extern PGDLLIMPORT int SPI_result;
extern int SPI_connect(void);
extern int SPI_finish(void);
-extern void SPI_push(void);
-extern void SPI_pop(void);
-extern bool SPI_push_conditional(void);
-extern void SPI_pop_conditional(bool pushed);
-extern void SPI_restore_connection(void);
extern int SPI_execute(const char *src, bool read_only, long tcount);
extern int SPI_execute_plan(SPIPlanPtr plan, Datum *Values, const char *Nulls,
bool read_only, long tcount);
diff --git a/src/pl/plperl/plperl.c b/src/pl/plperl/plperl.c
index 461986cda3..9a2d0527f8 100644
--- a/src/pl/plperl/plperl.c
+++ b/src/pl/plperl/plperl.c
@@ -3057,12 +3057,6 @@ plperl_spi_exec(char *query, int limit)
ReleaseCurrentSubTransaction();
MemoryContextSwitchTo(oldcontext);
CurrentResourceOwner = oldowner;
-
- /*
- * AtEOSubXact_SPI() should not have popped any SPI context, but just
- * in case it did, make sure we remain connected.
- */
- SPI_restore_connection();
}
PG_CATCH();
{
@@ -3078,13 +3072,6 @@ plperl_spi_exec(char *query, int limit)
MemoryContextSwitchTo(oldcontext);
CurrentResourceOwner = oldowner;
- /*
- * If AtEOSubXact_SPI() popped any SPI context of the subxact, it will
- * have left us in a disconnected state. We need this hack to return
- * to connected state.
- */
- SPI_restore_connection();
-
/* Punt the error to Perl */
croak_cstr(edata->message);
@@ -3296,12 +3283,6 @@ plperl_spi_query(char *query)
ReleaseCurrentSubTransaction();
MemoryContextSwitchTo(oldcontext);
CurrentResourceOwner = oldowner;
-
- /*
- * AtEOSubXact_SPI() should not have popped any SPI context, but just
- * in case it did, make sure we remain connected.
- */
- SPI_restore_connection();
}
PG_CATCH();
{
@@ -3317,13 +3298,6 @@ plperl_spi_query(char *query)
MemoryContextSwitchTo(oldcontext);
CurrentResourceOwner = oldowner;
- /*
- * If AtEOSubXact_SPI() popped any SPI context of the subxact, it will
- * have left us in a disconnected state. We need this hack to return
- * to connected state.
- */
- SPI_restore_connection();
-
/* Punt the error to Perl */
croak_cstr(edata->message);
@@ -3382,12 +3356,6 @@ plperl_spi_fetchrow(char *cursor)
ReleaseCurrentSubTransaction();
MemoryContextSwitchTo(oldcontext);
CurrentResourceOwner = oldowner;
-
- /*
- * AtEOSubXact_SPI() should not have popped any SPI context, but just
- * in case it did, make sure we remain connected.
- */
- SPI_restore_connection();
}
PG_CATCH();
{
@@ -3403,13 +3371,6 @@ plperl_spi_fetchrow(char *cursor)
MemoryContextSwitchTo(oldcontext);
CurrentResourceOwner = oldowner;
- /*
- * If AtEOSubXact_SPI() popped any SPI context of the subxact, it will
- * have left us in a disconnected state. We need this hack to return
- * to connected state.
- */
- SPI_restore_connection();
-
/* Punt the error to Perl */
croak_cstr(edata->message);
@@ -3543,12 +3504,6 @@ plperl_spi_prepare(char *query, int argc, SV **argv)
ReleaseCurrentSubTransaction();
MemoryContextSwitchTo(oldcontext);
CurrentResourceOwner = oldowner;
-
- /*
- * AtEOSubXact_SPI() should not have popped any SPI context, but just
- * in case it did, make sure we remain connected.
- */
- SPI_restore_connection();
}
PG_CATCH();
{
@@ -3574,13 +3529,6 @@ plperl_spi_prepare(char *query, int argc, SV **argv)
MemoryContextSwitchTo(oldcontext);
CurrentResourceOwner = oldowner;
- /*
- * If AtEOSubXact_SPI() popped any SPI context of the subxact, it will
- * have left us in a disconnected state. We need this hack to return
- * to connected state.
- */
- SPI_restore_connection();
-
/* Punt the error to Perl */
croak_cstr(edata->message);
@@ -3694,12 +3642,6 @@ plperl_spi_exec_prepared(char *query, HV *attr, int argc, SV **argv)
ReleaseCurrentSubTransaction();
MemoryContextSwitchTo(oldcontext);
CurrentResourceOwner = oldowner;
-
- /*
- * AtEOSubXact_SPI() should not have popped any SPI context, but just
- * in case it did, make sure we remain connected.
- */
- SPI_restore_connection();
}
PG_CATCH();
{
@@ -3715,13 +3657,6 @@ plperl_spi_exec_prepared(char *query, HV *attr, int argc, SV **argv)
MemoryContextSwitchTo(oldcontext);
CurrentResourceOwner = oldowner;
- /*
- * If AtEOSubXact_SPI() popped any SPI context of the subxact, it will
- * have left us in a disconnected state. We need this hack to return
- * to connected state.
- */
- SPI_restore_connection();
-
/* Punt the error to Perl */
croak_cstr(edata->message);
@@ -3823,12 +3758,6 @@ plperl_spi_query_prepared(char *query, int argc, SV **argv)
ReleaseCurrentSubTransaction();
MemoryContextSwitchTo(oldcontext);
CurrentResourceOwner = oldowner;
-
- /*
- * AtEOSubXact_SPI() should not have popped any SPI context, but just
- * in case it did, make sure we remain connected.
- */
- SPI_restore_connection();
}
PG_CATCH();
{
@@ -3844,13 +3773,6 @@ plperl_spi_query_prepared(char *query, int argc, SV **argv)
MemoryContextSwitchTo(oldcontext);
CurrentResourceOwner = oldowner;
- /*
- * If AtEOSubXact_SPI() popped any SPI context of the subxact, it will
- * have left us in a disconnected state. We need this hack to return
- * to connected state.
- */
- SPI_restore_connection();
-
/* Punt the error to Perl */
croak_cstr(edata->message);
diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c
index 91e1f8dd3f..77e7440002 100644
--- a/src/pl/plpgsql/src/pl_exec.c
+++ b/src/pl/plpgsql/src/pl_exec.c
@@ -1337,12 +1337,6 @@ exec_stmt_block(PLpgSQL_execstate *estate, PLpgSQL_stmt_block *block)
* automatically cleaned up during subxact exit.)
*/
estate->eval_econtext = old_eval_econtext;
-
- /*
- * AtEOSubXact_SPI() should not have popped any SPI context, but
- * just in case it did, make sure we remain connected.
- */
- SPI_restore_connection();
}
PG_CATCH();
{
@@ -1384,13 +1378,6 @@ exec_stmt_block(PLpgSQL_execstate *estate, PLpgSQL_stmt_block *block)
/* Revert to outer eval_econtext */
estate->eval_econtext = old_eval_econtext;
- /*
- * If AtEOSubXact_SPI() popped any SPI context of the subxact, it
- * will have left us in a disconnected state. We need this hack
- * to return to connected state.
- */
- SPI_restore_connection();
-
/*
* Must clean up the econtext too. However, any tuple table made
* in the subxact will have been thrown away by SPI during subxact
@@ -5587,8 +5574,6 @@ exec_eval_simple_expr(PLpgSQL_execstate *estate,
* Without this, stable functions within the expression would fail to see
* updates made so far by our own function.
*/
- SPI_push();
-
oldcontext = MemoryContextSwitchTo(get_eval_mcontext(estate));
if (!estate->readonly_func)
{
@@ -5636,8 +5621,6 @@ exec_eval_simple_expr(PLpgSQL_execstate *estate,
MemoryContextSwitchTo(oldcontext);
- SPI_pop();
-
/*
* Now we can release our refcount on the cached plan.
*/
@@ -6281,8 +6264,6 @@ exec_cast_value(PLpgSQL_execstate *estate,
ExprContext *econtext = estate->eval_econtext;
MemoryContext oldcontext;
- SPI_push();
-
oldcontext = MemoryContextSwitchTo(get_eval_mcontext(estate));
econtext->caseValue_datum = value;
@@ -6296,8 +6277,6 @@ exec_cast_value(PLpgSQL_execstate *estate,
cast_entry->cast_in_use = false;
MemoryContextSwitchTo(oldcontext);
-
- SPI_pop();
}
}
diff --git a/src/pl/plpython/plpy_exec.c b/src/pl/plpython/plpy_exec.c
index fa5b25a5fa..697a0e1cc0 100644
--- a/src/pl/plpython/plpy_exec.c
+++ b/src/pl/plpython/plpy_exec.c
@@ -1103,8 +1103,6 @@ PLy_abort_open_subtransactions(int save_subxact_level)
RollbackAndReleaseCurrentSubTransaction();
- SPI_restore_connection();
-
subtransactiondata = (PLySubtransactionData *) linitial(explicit_subtransactions);
explicit_subtransactions = list_delete_first(explicit_subtransactions);
diff --git a/src/pl/plpython/plpy_spi.c b/src/pl/plpython/plpy_spi.c
index b082d017ea..07ab6a087e 100644
--- a/src/pl/plpython/plpy_spi.c
+++ b/src/pl/plpython/plpy_spi.c
@@ -516,12 +516,6 @@ PLy_spi_subtransaction_commit(MemoryContext oldcontext, ResourceOwner oldowner)
ReleaseCurrentSubTransaction();
MemoryContextSwitchTo(oldcontext);
CurrentResourceOwner = oldowner;
-
- /*
- * AtEOSubXact_SPI() should not have popped any SPI context, but just in
- * case it did, make sure we remain connected.
- */
- SPI_restore_connection();
}
void
@@ -541,13 +535,6 @@ PLy_spi_subtransaction_abort(MemoryContext oldcontext, ResourceOwner oldowner)
MemoryContextSwitchTo(oldcontext);
CurrentResourceOwner = oldowner;
- /*
- * If AtEOSubXact_SPI() popped any SPI context of the subxact, it will
- * have left us in a disconnected state. We need this hack to return to
- * connected state.
- */
- SPI_restore_connection();
-
/* Look up the correct exception */
entry = hash_search(PLy_spi_exceptions, &(edata->sqlerrcode),
HASH_FIND, NULL);
diff --git a/src/pl/plpython/plpy_subxactobject.c b/src/pl/plpython/plpy_subxactobject.c
index 81fb3a3a4a..9f1caa87d9 100644
--- a/src/pl/plpython/plpy_subxactobject.c
+++ b/src/pl/plpython/plpy_subxactobject.c
@@ -7,7 +7,6 @@
#include "postgres.h"
#include "access/xact.h"
-#include "executor/spi.h"
#include "utils/memutils.h"
#include "plpython.h"
@@ -213,12 +212,6 @@ PLy_subtransaction_exit(PyObject *self, PyObject *args)
CurrentResourceOwner = subxactdata->oldowner;
pfree(subxactdata);
- /*
- * AtEOSubXact_SPI() should not have popped any SPI context, but just in
- * case it did, make sure we remain connected.
- */
- SPI_restore_connection();
-
Py_INCREF(Py_None);
return Py_None;
}
diff --git a/src/pl/tcl/pltcl.c b/src/pl/tcl/pltcl.c
index 20809102ef..b0d9e419bb 100644
--- a/src/pl/tcl/pltcl.c
+++ b/src/pl/tcl/pltcl.c
@@ -2182,11 +2182,9 @@ pltcl_returnnext(ClientData cdata, Tcl_Interp *interp,
{
HeapTuple tuple;
- SPI_push();
tuple = pltcl_build_tuple_result(interp, rowObjv, rowObjc,
call_state);
tuplestore_puttuple(call_state->tuple_store, tuple);
- SPI_pop();
}
}
else
@@ -2249,12 +2247,6 @@ pltcl_subtrans_commit(MemoryContext oldcontext, ResourceOwner oldowner)
ReleaseCurrentSubTransaction();
MemoryContextSwitchTo(oldcontext);
CurrentResourceOwner = oldowner;
-
- /*
- * AtEOSubXact_SPI() should not have popped any SPI context, but just in
- * case it did, make sure we remain connected.
- */
- SPI_restore_connection();
}
static void
@@ -2273,13 +2265,6 @@ pltcl_subtrans_abort(Tcl_Interp *interp,
MemoryContextSwitchTo(oldcontext);
CurrentResourceOwner = oldowner;
- /*
- * If AtEOSubXact_SPI() popped any SPI context of the subxact, it will
- * have left us in a disconnected state. We need this hack to return to
- * connected state.
- */
- SPI_restore_connection();
-
/* Pass the error data to Tcl */
pltcl_construct_errorCode(interp, edata);
UTF_BEGIN;
@@ -3029,9 +3014,6 @@ pltcl_build_tuple_argument(HeapTuple tuple, TupleDesc tupdesc)
* mess, there's no way to prevent the datatype input functions it calls
* from leaking. Run it in a short-lived context, unless we're about to
* exit the procedure anyway.
- *
- * Also, caller is responsible for doing SPI_push/SPI_pop if calling from
- * inside SPI environment.
**********************************************************************/
static HeapTuple
pltcl_build_tuple_result(Tcl_Interp *interp, Tcl_Obj **kvObjv, int kvObjc,
--
cgit v1.2.3
From 3887ba6dbb08f50c0ee6639a80e68ef697222457 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Wed, 9 Nov 2016 12:00:00 -0500
Subject: doc: Improve whitespace use in XSL
---
doc/src/sgml/stylesheet-common.xsl | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/stylesheet-common.xsl b/doc/src/sgml/stylesheet-common.xsl
index e3841130eb..c23d38f128 100644
--- a/doc/src/sgml/stylesheet-common.xsl
+++ b/doc/src/sgml/stylesheet-common.xsl
@@ -77,7 +77,9 @@
- ??
+ ?
+
+ ?
--
cgit v1.2.3
From 41124a91e61fc6d9681c1e8b15ba30494e84d643 Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Wed, 9 Nov 2016 16:26:32 -0500
Subject: pgbench: Allow the transaction log file prefix to be changed.
Masahiko Sawada, reviewed by Fabien Coelho and Beena Emerson, with
some a bit of wordsmithing and cosmetic adjustment by me.
---
doc/src/sgml/ref/pgbench.sgml | 26 +++++++++++++++++++-------
src/bin/pgbench/pgbench.c | 20 ++++++++++++++++++--
2 files changed, 37 insertions(+), 9 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 285608d508..3a65729bf3 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -614,6 +614,16 @@ pgbench options> dbname>
+
+
--log-prefix=prefix>
+
+
+ Set the filename prefix for the transaction log file created by
+
--log>. The default is pgbench_log>.
+
+
+
+
@@ -1121,15 +1131,17 @@ END;
With the
-l> option but without the
--aggregate-interval
,
pgbench> writes the time taken by each transaction
to a log file. The log file will be named
- pgbench_log.nnn>, where
- nnn> is the PID of the pgbench process.
- If the
-j> option is 2 or higher, creating multiple worker
- threads, each will have its own log file. The first worker will use the
- same name for its log file as in the standard single worker case.
+ prefix>.nnn>,
+ where prefix> defaults to pgbench_log>, and
+ nnn> is the PID of the
+ pgbench process. If the
-j> option is 2 or higher,
+ creating multiple worker threads, each will have its own log file. The first worker will
+ use the same name for its log file as in the standard single worker case.
The additional log files for the other workers will be named
- pgbench_log.nnn>.mmm>,
+ pgbench_log>.nnn>.mmm>,
where mmm> is a sequential number for each worker starting
- with 1.
+ with 1. The prefix can be changed by using the
--log-prefix>
+ option.
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index d44cfdab49..a7fdd8ac94 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -180,6 +180,7 @@ char *pghost = "";
char *pgport = "";
char *login = NULL;
char *dbName;
+char *logfile_prefix = NULL;
const char *progname;
#define WSEP '@' /* weight separator */
@@ -511,6 +512,8 @@ usage(void)
" --aggregate-interval=NUM aggregate data over NUM seconds\n"
" --progress-timestamp use Unix epoch timestamps for progress\n"
" --sampling-rate=NUM fraction of transactions to log (e.g., 0.01 for 1%%)\n"
+ " --log-prefix=PREFIX prefix for transaction time log file\n"
+ " (default: \"pgbench_log\")\n"
"\nCommon options:\n"
" -d, --debug print debugging output\n"
" -h, --host=HOSTNAME database server host or socket directory\n"
@@ -3643,6 +3646,7 @@ main(int argc, char **argv)
{"sampling-rate", required_argument, NULL, 4},
{"aggregate-interval", required_argument, NULL, 5},
{"progress-timestamp", no_argument, NULL, 6},
+ {"log-prefix", required_argument, NULL, 7},
{NULL, 0, NULL, 0}
};
@@ -3990,6 +3994,10 @@ main(int argc, char **argv)
progress_timestamp = true;
benchmarking_option_set = true;
break;
+ case 7:
+ benchmarking_option_set = true;
+ logfile_prefix = pg_strdup(optarg);
+ break;
default:
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
@@ -4087,6 +4095,12 @@ main(int argc, char **argv)
exit(1);
}
+ if (!use_log && logfile_prefix)
+ {
+ fprintf(stderr, "log file prefix (--log-prefix) is allowed only when logging transactions (-l)\n");
+ exit(1);
+ }
+
if (duration > 0 && agg_interval > duration)
{
fprintf(stderr, "number of seconds for aggregation (%d) must not be higher than test duration (%d)\n", agg_interval, duration);
@@ -4388,11 +4402,13 @@ threadRun(void *arg)
if (use_log)
{
char logpath[64];
+ char *prefix = logfile_prefix ? logfile_prefix : "pgbench_log";
if (thread->tid == 0)
- snprintf(logpath, sizeof(logpath), "pgbench_log.%d", main_pid);
+ snprintf(logpath, sizeof(logpath), "%s.%d", prefix, main_pid);
else
- snprintf(logpath, sizeof(logpath), "pgbench_log.%d.%d", main_pid, thread->tid);
+ snprintf(logpath, sizeof(logpath), "%s.%d.%d", prefix, main_pid, thread->tid);
+
thread->logfile = fopen(logpath, "w");
if (thread->logfile == NULL)
--
cgit v1.2.3
From 0b1b5033ac19236a4a23eed9232d74936717020e Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Thu, 10 Nov 2016 10:37:14 -0500
Subject: Doc: improve link.
Discussion: <5019.1478790246@sss.pgh.pa.us>
---
doc/src/sgml/storage.sgml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/storage.sgml b/doc/src/sgml/storage.sgml
index fddb69bad3..5c52824dfc 100644
--- a/doc/src/sgml/storage.sgml
+++ b/doc/src/sgml/storage.sgml
@@ -474,7 +474,7 @@ for storing TOAST>-able columns on disk:
Each TOAST>-able data type specifies a default strategy for columns
of that data type, but the strategy for a given table column can be altered
-with ALTER TABLE SET STORAGE>.
+with ALTER TABLE ... SET STORAGE>.
--
cgit v1.2.3
From 279c439c7fbc1bcb52173d92dd3b1fbe63e497ab Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Thu, 10 Nov 2016 14:13:43 -0500
Subject: Support "COPY view FROM" for views with INSTEAD OF INSERT triggers.
We just pass the data to the INSTEAD trigger.
Haribabu Kommi, reviewed by Dilip Kumar
Patch:
---
doc/src/sgml/ref/copy.sgml | 10 +++-
src/backend/commands/copy.c | 104 +++++++++++++++++++++---------------
src/test/regress/expected/copy2.out | 26 +++++++++
src/test/regress/sql/copy2.sql | 29 ++++++++++
4 files changed, 125 insertions(+), 44 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 07e2f45196..2477a872e8 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -395,9 +395,15 @@ COPY countNotes
- COPY can only be used with plain tables, not
+ COPY TO can only be used with plain tables, not
with views. However, you can write COPY (SELECT * FROM
- viewname) TO ....
+ viewname) TO ...
+ to copy the current contents of a view.
+
+
+
+ COPY FROM can be used with plain tables and with views
+ that have INSTEAD OF INSERT> triggers.
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index b4140eb68a..3c81906232 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -864,8 +864,8 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt, uint64 *processed)
* statement.
*
* In the case that columns are specified in the attribute list,
- * create a ColumnRef and ResTarget for each column and add them to
- * the target list for the resulting SELECT statement.
+ * create a ColumnRef and ResTarget for each column and add them
+ * to the target list for the resulting SELECT statement.
*/
if (!stmt->attlist)
{
@@ -2269,13 +2269,21 @@ CopyFrom(CopyState cstate)
Assert(cstate->rel);
- if (cstate->rel->rd_rel->relkind != RELKIND_RELATION)
+ /*
+ * The target must be a plain relation or have an INSTEAD OF INSERT row
+ * trigger. (Currently, such triggers are only allowed on views, so we
+ * only hint about them in the view case.)
+ */
+ if (cstate->rel->rd_rel->relkind != RELKIND_RELATION &&
+ !(cstate->rel->trigdesc &&
+ cstate->rel->trigdesc->trig_insert_instead_row))
{
if (cstate->rel->rd_rel->relkind == RELKIND_VIEW)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("cannot copy to view \"%s\"",
- RelationGetRelationName(cstate->rel))));
+ RelationGetRelationName(cstate->rel)),
+ errhint("To enable copying to a view, provide an INSTEAD OF INSERT trigger.")));
else if (cstate->rel->rd_rel->relkind == RELKIND_MATVIEW)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
@@ -2496,52 +2504,64 @@ CopyFrom(CopyState cstate)
if (!skip_tuple)
{
- /* Check the constraints of the tuple */
- if (cstate->rel->rd_att->constr)
- ExecConstraints(resultRelInfo, slot, estate);
-
- if (useHeapMultiInsert)
+ if (resultRelInfo->ri_TrigDesc &&
+ resultRelInfo->ri_TrigDesc->trig_insert_instead_row)
{
- /* Add this tuple to the tuple buffer */
- if (nBufferedTuples == 0)
- firstBufferedLineNo = cstate->cur_lineno;
- bufferedTuples[nBufferedTuples++] = tuple;
- bufferedTuplesSize += tuple->t_len;
-
- /*
- * If the buffer filled up, flush it. Also flush if the total
- * size of all the tuples in the buffer becomes large, to
- * avoid using large amounts of memory for the buffers when
- * the tuples are exceptionally wide.
- */
- if (nBufferedTuples == MAX_BUFFERED_TUPLES ||
- bufferedTuplesSize > 65535)
- {
- CopyFromInsertBatch(cstate, estate, mycid, hi_options,
- resultRelInfo, myslot, bistate,
- nBufferedTuples, bufferedTuples,
- firstBufferedLineNo);
- nBufferedTuples = 0;
- bufferedTuplesSize = 0;
- }
+ /* Pass the data to the INSTEAD ROW INSERT trigger */
+ ExecIRInsertTriggers(estate, resultRelInfo, slot);
}
else
{
- List *recheckIndexes = NIL;
+ /* Check the constraints of the tuple */
+ if (cstate->rel->rd_att->constr)
+ ExecConstraints(resultRelInfo, slot, estate);
+
+ if (useHeapMultiInsert)
+ {
+ /* Add this tuple to the tuple buffer */
+ if (nBufferedTuples == 0)
+ firstBufferedLineNo = cstate->cur_lineno;
+ bufferedTuples[nBufferedTuples++] = tuple;
+ bufferedTuplesSize += tuple->t_len;
+
+ /*
+ * If the buffer filled up, flush it. Also flush if the
+ * total size of all the tuples in the buffer becomes
+ * large, to avoid using large amounts of memory for the
+ * buffer when the tuples are exceptionally wide.
+ */
+ if (nBufferedTuples == MAX_BUFFERED_TUPLES ||
+ bufferedTuplesSize > 65535)
+ {
+ CopyFromInsertBatch(cstate, estate, mycid, hi_options,
+ resultRelInfo, myslot, bistate,
+ nBufferedTuples, bufferedTuples,
+ firstBufferedLineNo);
+ nBufferedTuples = 0;
+ bufferedTuplesSize = 0;
+ }
+ }
+ else
+ {
+ List *recheckIndexes = NIL;
- /* OK, store the tuple and create index entries for it */
- heap_insert(cstate->rel, tuple, mycid, hi_options, bistate);
+ /* OK, store the tuple and create index entries for it */
+ heap_insert(cstate->rel, tuple, mycid, hi_options, bistate);
- if (resultRelInfo->ri_NumIndices > 0)
- recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
- estate, false, NULL,
- NIL);
+ if (resultRelInfo->ri_NumIndices > 0)
+ recheckIndexes = ExecInsertIndexTuples(slot,
+ &(tuple->t_self),
+ estate,
+ false,
+ NULL,
+ NIL);
- /* AFTER ROW INSERT Triggers */
- ExecARInsertTriggers(estate, resultRelInfo, tuple,
- recheckIndexes);
+ /* AFTER ROW INSERT Triggers */
+ ExecARInsertTriggers(estate, resultRelInfo, tuple,
+ recheckIndexes);
- list_free(recheckIndexes);
+ list_free(recheckIndexes);
+ }
}
/*
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 9a8922df2d..65e9c626b3 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -535,6 +535,29 @@ COPY rls_t1 (a, b) TO stdout;
2 3
4 1
RESET SESSION AUTHORIZATION;
+-- test with INSTEAD OF INSERT trigger on a view
+CREATE TABLE instead_of_insert_tbl(id serial, name text);
+CREATE VIEW instead_of_insert_tbl_view AS SELECT ''::text AS str;
+COPY instead_of_insert_tbl_view FROM stdin; -- fail
+ERROR: cannot copy to view "instead_of_insert_tbl_view"
+HINT: To enable copying to a view, provide an INSTEAD OF INSERT trigger.
+CREATE FUNCTION fun_instead_of_insert_tbl() RETURNS trigger AS $$
+BEGIN
+ INSERT INTO instead_of_insert_tbl (name) VALUES (NEW.str);
+ RETURN NULL;
+END;
+$$ LANGUAGE plpgsql;
+CREATE TRIGGER trig_instead_of_insert_tbl_view
+ INSTEAD OF INSERT ON instead_of_insert_tbl_view
+ FOR EACH ROW EXECUTE PROCEDURE fun_instead_of_insert_tbl();
+COPY instead_of_insert_tbl_view FROM stdin;
+SELECT * FROM instead_of_insert_tbl;
+ id | name
+----+-------
+ 1 | test1
+(1 row)
+
+-- clean up
DROP TABLE forcetest;
DROP TABLE vistest;
DROP FUNCTION truncate_in_subxact();
@@ -544,3 +567,6 @@ DROP ROLE regress_rls_copy_user;
DROP ROLE regress_rls_copy_user_colperms;
DROP FUNCTION fn_x_before();
DROP FUNCTION fn_x_after();
+DROP TABLE instead_of_insert_tbl;
+DROP VIEW instead_of_insert_tbl_view;
+DROP FUNCTION fun_instead_of_insert_tbl();
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index 89d0a39eb9..f3a6d228fa 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -387,6 +387,32 @@ COPY rls_t1 (a, b) TO stdout;
RESET SESSION AUTHORIZATION;
+-- test with INSTEAD OF INSERT trigger on a view
+CREATE TABLE instead_of_insert_tbl(id serial, name text);
+CREATE VIEW instead_of_insert_tbl_view AS SELECT ''::text AS str;
+
+COPY instead_of_insert_tbl_view FROM stdin; -- fail
+test1
+\.
+
+CREATE FUNCTION fun_instead_of_insert_tbl() RETURNS trigger AS $$
+BEGIN
+ INSERT INTO instead_of_insert_tbl (name) VALUES (NEW.str);
+ RETURN NULL;
+END;
+$$ LANGUAGE plpgsql;
+CREATE TRIGGER trig_instead_of_insert_tbl_view
+ INSTEAD OF INSERT ON instead_of_insert_tbl_view
+ FOR EACH ROW EXECUTE PROCEDURE fun_instead_of_insert_tbl();
+
+COPY instead_of_insert_tbl_view FROM stdin;
+test1
+\.
+
+SELECT * FROM instead_of_insert_tbl;
+
+
+-- clean up
DROP TABLE forcetest;
DROP TABLE vistest;
DROP FUNCTION truncate_in_subxact();
@@ -396,3 +422,6 @@ DROP ROLE regress_rls_copy_user;
DROP ROLE regress_rls_copy_user_colperms;
DROP FUNCTION fn_x_before();
DROP FUNCTION fn_x_after();
+DROP TABLE instead_of_insert_tbl;
+DROP VIEW instead_of_insert_tbl_view;
+DROP FUNCTION fun_instead_of_insert_tbl();
--
cgit v1.2.3
From 1c1475577623019ff887825b9a804f78ec806344 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Fri, 11 Nov 2016 12:03:49 -0500
Subject: Doc: fix data types of FuncCallContext's call_cntr and max_calls
fields.
Commit 23a27b039 widened these from uint32 to uint64, but I overlooked
that the documentation explicitly showed them as uint32. Per report
from Vicky Vergara.
Report: <20161111135422.8761.36733@wrigleys.postgresql.org>
---
doc/src/sgml/xfunc.sgml | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/xfunc.sgml b/doc/src/sgml/xfunc.sgml
index de6a466efc..783702a637 100644
--- a/doc/src/sgml/xfunc.sgml
+++ b/doc/src/sgml/xfunc.sgml
@@ -2870,7 +2870,7 @@ HeapTupleGetDatum(HeapTuple tuple)
is used to hold a pointer to FuncCallContext>
across calls.
-typedef struct
+typedef struct FuncCallContext
{
/*
* Number of times we've been called before
@@ -2878,7 +2878,7 @@ typedef struct
* call_cntr is initialized to 0 for you by SRF_FIRSTCALL_INIT(), and
* incremented for you every time SRF_RETURN_NEXT() is called.
*/
- uint32 call_cntr;
+ uint64 call_cntr;
/*
* OPTIONAL maximum number of calls
@@ -2887,7 +2887,7 @@ typedef struct
* If not set, you must provide alternative means to know when the
* function is done.
*/
- uint32 max_calls;
+ uint64 max_calls;
/*
* OPTIONAL pointer to result slot
--
cgit v1.2.3
From d5d8a0b7e54ca09d0b5fdfc6afcb307450f33215 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Sun, 13 Nov 2016 13:12:35 -0500
Subject: Doc: remove obsolete example.
The documentation for ts_headline() recommends using a sub-select to
avoid extra evaluations of ts_headline() in a query with ORDER BY+LIMIT.
Since commit 9118d03a8 this contortionism is unnecessary, so remove the
recommendation. Noted by Oleg Bartunov.
Discussion:
---
doc/src/sgml/textsearch.sgml | 14 +-------------
1 file changed, 1 insertion(+), 13 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml
index 5a70d7db80..2da75955d0 100644
--- a/doc/src/sgml/textsearch.sgml
+++ b/doc/src/sgml/textsearch.sgml
@@ -1290,19 +1290,7 @@ query.',
ts_headline> uses the original document, not a
tsvector summary, so it can be slow and should be used with
- care. A typical mistake is to call ts_headline for
- every matching document when only ten documents are
- to be shown. SQL subqueries can help; here is an
- example:
-
-
-SELECT id, ts_headline(body, q), rank
-FROM (SELECT id, body, q, ts_rank_cd(ti, q) AS rank
- FROM apod, to_tsquery('stars') q
- WHERE ti @@ q
- ORDER BY rank DESC
- LIMIT 10) AS foo;
-
+ care.
--
cgit v1.2.3
From 8ce4f597abc530b3b59bcf3a3964f31e50054bcd Mon Sep 17 00:00:00 2001
From: Alvaro Herrera
Date: Mon, 14 Nov 2016 11:14:34 -0300
Subject: Fix duplication in ALTER MATERIALIZE VIEW synopsis
Commit 3c4cf080879b should have removed SET TABLESPACE from the synopsis
of ALTER MATERIALIZE VIEW as a possible "action" when it added a
separate line for it in the main command listing, but failed to.
Repair.
Backpatch to 9.4, like the aforementioned commit.
---
doc/src/sgml/ref/alter_materialized_view.sgml | 1 -
1 file changed, 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/alter_materialized_view.sgml b/doc/src/sgml/ref/alter_materialized_view.sgml
index b5c44bfabf..b88f5ac00f 100644
--- a/doc/src/sgml/ref/alter_materialized_view.sgml
+++ b/doc/src/sgml/ref/alter_materialized_view.sgml
@@ -45,7 +45,6 @@ ALTER MATERIALIZED VIEW ALL IN TABLESPACE namestorage_parameter = value [, ... ] )
RESET ( storage_parameter [, ... ] )
OWNER TO { new_owner | CURRENT_USER | SESSION_USER }
- SET TABLESPACE new_tablespace
--
cgit v1.2.3
From ffa8c3d8521bbd7c6da497264f1a12572a1ff713 Mon Sep 17 00:00:00 2001
From: Andres Freund
Date: Mon, 14 Nov 2016 14:53:07 -0800
Subject: Provide NO_INSTALLCHECK option for pgxs.
This allows us to avoid running the regression tests in contrib modules
like pg_stat_statement in a less ugly manner.
Discussion: <22432.1478968242@sss.pgh.pa.us>
---
contrib/pg_stat_statements/Makefile | 7 +++----
doc/src/sgml/extend.sgml | 9 +++++++++
src/makefiles/pgxs.mk | 4 ++++
3 files changed, 16 insertions(+), 4 deletions(-)
(limited to 'doc/src')
diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile
index f1a45eb17a..298951a5f5 100644
--- a/contrib/pg_stat_statements/Makefile
+++ b/contrib/pg_stat_statements/Makefile
@@ -13,6 +13,9 @@ LDFLAGS_SL += $(filter -lm, $(LIBS))
REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf
REGRESS = pg_stat_statements
+# Disabled because these tests require "shared_preload_libraries=pg_stat_statements",
+# which typical installcheck users do not have (e.g. buildfarm clients).
+NO_INSTALLCHECK = 1
ifdef USE_PGXS
PG_CONFIG = pg_config
@@ -24,7 +27,3 @@ top_builddir = ../..
include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif
-
-# Disabled because these tests require "shared_preload_libraries=pg_stat_statements",
-# which typical installcheck users do not have (e.g. buildfarm clients).
-installcheck: REGRESS=
diff --git a/doc/src/sgml/extend.sgml b/doc/src/sgml/extend.sgml
index e19c657d8f..f9d91a3923 100644
--- a/doc/src/sgml/extend.sgml
+++ b/doc/src/sgml/extend.sgml
@@ -1193,6 +1193,15 @@ include $(PGXS)
+
+ NO_INSTALLCHECK
+
+
+ don't define an installcheck target, useful e.g. if tests require special configuration, or don't use pg_regress
+
+
+
+
EXTRA_CLEAN
diff --git a/src/makefiles/pgxs.mk b/src/makefiles/pgxs.mk
index 2b4d684e8e..c27004ecfb 100644
--- a/src/makefiles/pgxs.mk
+++ b/src/makefiles/pgxs.mk
@@ -40,6 +40,8 @@
# which need to be built first
# REGRESS -- list of regression test cases (without suffix)
# REGRESS_OPTS -- additional switches to pass to pg_regress
+# NO_INSTALLCHECK -- don't define an installcheck target, useful e.g. if
+# tests require special configuration, or don't use pg_regress
# EXTRA_CLEAN -- extra files to remove in 'make clean'
# PG_CPPFLAGS -- will be added to CPPFLAGS
# PG_LIBS -- will be added to PROGRAM link line
@@ -268,8 +270,10 @@ ifndef PGXS
endif
# against installed postmaster
+ifndef NO_INSTALLCHECK
installcheck: submake $(REGRESS_PREP)
$(pg_regress_installcheck) $(REGRESS_OPTS) $(REGRESS)
+endif
ifdef PGXS
check:
--
cgit v1.2.3
From e36ddab11735052841b4eff96642187ec9a8a7bc Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Tue, 8 Nov 2016 12:00:00 -0500
Subject: Build HTML documentation using XSLT stylesheets by default
The old DSSSL build is still available for a while using the make target
"oldhtml".
---
doc/src/sgml/Makefile | 8 ++++----
doc/src/sgml/stylesheet.css | 50 +++++++++++++++++----------------------------
2 files changed, 23 insertions(+), 35 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/Makefile b/doc/src/sgml/Makefile
index 84c94e8ae0..fe7ca65cd4 100644
--- a/doc/src/sgml/Makefile
+++ b/doc/src/sgml/Makefile
@@ -106,9 +106,9 @@ draft: postgres.sgml $(ALMOSTALLSGML) stylesheet.dsl
$(JADE.html.call) -V draft-mode $<
cp $(srcdir)/stylesheet.css html/
-html: html-stamp
+oldhtml: oldhtml-stamp
-html-stamp: postgres.sgml $(ALLSGML) stylesheet.dsl
+oldhtml-stamp: postgres.sgml $(ALLSGML) stylesheet.dsl
$(MAKE) check-tabs
$(MKDIR_P) html
$(JADE.html.call) -i include-index $<
@@ -258,9 +258,9 @@ ifeq ($(STYLE),website)
XSLTPROC_HTML_FLAGS += --param website.stylesheet 1
endif
-xslthtml: xslthtml-stamp
+html: html-stamp
-xslthtml-stamp: stylesheet.xsl postgres.xml
+html-stamp: stylesheet.xsl postgres.xml
$(XMLLINT) --noout --valid postgres.xml
$(XSLTPROC) $(XSLTPROCFLAGS) $(XSLTPROC_HTML_FLAGS) $^
cp $(srcdir)/stylesheet.css html/
diff --git a/doc/src/sgml/stylesheet.css b/doc/src/sgml/stylesheet.css
index 60dcc76209..f845876d07 100644
--- a/doc/src/sgml/stylesheet.css
+++ b/doc/src/sgml/stylesheet.css
@@ -2,18 +2,18 @@
/* color scheme similar to www.postgresql.org */
-BODY {
+body {
color: #000000;
background: #FFFFFF;
font-family: verdana, sans-serif;
}
-A:link { color:#0066A2; }
-A:visited { color:#004E66; }
-A:active { color:#0066A2; }
-A:hover { color:#000000; }
+a:link { color:#0066A2; }
+a:visited { color:#004E66; }
+a:active { color:#0066A2; }
+a:hover { color:#000000; }
-H1 {
+h1 {
font-size: 1.4em;
font-weight: bold;
margin-top: 0em;
@@ -21,34 +21,34 @@ H1 {
color: #EC5800;
}
-H2 {
+h2 {
font-size: 1.2em;
margin: 1.2em 0em 1.2em 0em;
font-weight: bold;
- color: #666;
+ color: #EC5800;
}
-H3 {
+h3 {
font-size: 1.1em;
margin: 1.2em 0em 1.2em 0em;
font-weight: bold;
color: #666;
}
-H4 {
+h4 {
font-size: 0.95em;
margin: 1.2em 0em 1.2em 0em;
font-weight: normal;
color: #666;
}
-H5 {
+h5 {
font-size: 0.9em;
margin: 1.2em 0em 1.2em 0em;
font-weight: normal;
}
-H6 {
+h6 {
font-size: 0.85em;
margin: 1.2em 0em 1.2em 0em;
font-weight: normal;
@@ -56,13 +56,13 @@ H6 {
/* center some titles */
-.BOOK .TITLE, .BOOK .CORPAUTHOR, .BOOK .COPYRIGHT {
+.book .title, .book .corpauthor, .book .copyright {
text-align: center;
}
/* decoration for formal examples */
-DIV.EXAMPLE {
+div.example {
padding-left: 15px;
border-style: solid;
border-width: 0px;
@@ -71,28 +71,16 @@ DIV.EXAMPLE {
margin: 0.5ex;
}
-/* less dense spacing of TOC */
-
-.BOOK .TOC DL DT {
- padding-top: 1.5ex;
- padding-bottom: 1.5ex;
-}
-
-.BOOK .TOC DL DL DT {
- padding-top: 0ex;
- padding-bottom: 0ex;
-}
-
/* miscellaneous */
-PRE.LITERALLAYOUT, .SCREEN, .SYNOPSIS, .PROGRAMLISTING {
+pre.literallayout, .screen, .synopsis, .programlisting {
margin-left: 4ex;
}
-.COMMENT { color: red; }
+.comment { color: red; }
-VAR { font-family: monospace; font-style: italic; }
+var { font-family: monospace; font-style: italic; }
/* Konqueror's standard style for ACRONYM is italic. */
-ACRONYM { font-style: inherit; }
+acronym { font-style: inherit; }
-.OPTION { white-space: nowrap; }
+.option { white-space: nowrap; }
--
cgit v1.2.3
From 380895f2deb18ed9e7a8be69961af2ed221ba9d3 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Wed, 16 Nov 2016 12:00:00 -0800
Subject: doc: Further XSLT HTML build performance optimization
Cut out some expensive stuff from the HTML head element that we don't
really need.
This was previously discussed as part of
e8306745e3504c642f7abad411139d5630e29fac, but ended up separate because
it changes the output contents slightly.
---
doc/src/sgml/stylesheet-speedup-xhtml.xsl | 41 +++++++++++++++++++++++++++++++
1 file changed, 41 insertions(+)
(limited to 'doc/src')
diff --git a/doc/src/sgml/stylesheet-speedup-xhtml.xsl b/doc/src/sgml/stylesheet-speedup-xhtml.xsl
index 53c811cab0..ff08bef808 100644
--- a/doc/src/sgml/stylesheet-speedup-xhtml.xsl
+++ b/doc/src/sgml/stylesheet-speedup-xhtml.xsl
@@ -251,4 +251,45 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--
cgit v1.2.3
From 67dc4ccbb2e1c27da823eced66d9217a5652cbb0 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Fri, 18 Nov 2016 12:00:00 -0500
Subject: Add pg_sequences view
Like pg_tables, pg_views, and others, this view contains information
about sequences in a way that is independent of the system catalog
layout but more comprehensive than the information schema.
To help implement the view, add a new internal function
pg_sequence_last_value() to return the last value of a sequence. This
is kept separate from pg_sequence_parameters() to separate querying
run-time state from catalog-like information.
Reviewed-by: Andreas Karlsson
---
doc/src/sgml/catalogs.sgml | 97 ++++++++++++++++++++++++++++++++
src/backend/catalog/system_views.sql | 17 ++++++
src/backend/commands/sequence.c | 49 +++++++++++++++-
src/include/catalog/catversion.h | 2 +-
src/include/catalog/pg_proc.h | 4 +-
src/include/commands/sequence.h | 1 +
src/test/regress/expected/rules.out | 14 +++++
src/test/regress/expected/sequence.out | 23 +++++++-
src/test/regress/expected/sequence_1.out | 23 +++++++-
src/test/regress/sql/sequence.sql | 13 ++++-
10 files changed, 233 insertions(+), 10 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index bac169a19e..561e228558 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -7394,6 +7394,11 @@
security labels
+
+ pg_sequences
+ sequences
+
+
pg_settingsparameter settings
@@ -9135,6 +9140,98 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
+
+ pg_sequences
+
+
+ pg_sequences
+
+
+
+ The view pg_sequences provides access to
+ useful information about each sequence in the database.
+
+
+
+ pg_sequences> Columns
+
+
+
+
+ Name
+ Type
+ References
+ Description
+
+
+
+
+ schemaname
+ name
+ pg_namespace.nspname
+ Name of schema containing sequence
+
+
+ sequencename
+ name
+ pg_class.relname
+ Name of sequence
+
+
+ sequenceowner
+ name
+ pg_authid.rolname
+ Name of sequence's owner
+
+
+ start_value
+ bigint
+
+ Start value of the sequence
+
+
+ min_value
+ bigint
+
+ Minimum value of the sequence
+
+
+ max_value
+ bigint
+
+ Maximum value of the sequence
+
+
+ increment_by
+ bigint
+
+ Increment value of the sequence
+
+
+ cycle
+ boolean
+
+ Whether the sequence cycles
+
+
+ cache_size
+ bigint
+
+ Cache size of the sequence
+
+
+ last_value
+ bigint
+
+ The last sequence value written to disk. If caching is used,
+ this value can be greater than the last value handed out from the
+ sequence. Null if the sequence has not been read from yet.
+
+
+
+
+
+
pg_settings
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index ada214274f..e011af122c 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -158,6 +158,23 @@ CREATE VIEW pg_indexes AS
LEFT JOIN pg_tablespace T ON (T.oid = I.reltablespace)
WHERE C.relkind IN ('r', 'm') AND I.relkind = 'i';
+CREATE OR REPLACE VIEW pg_sequences AS
+ SELECT
+ N.nspname AS schemaname,
+ C.relname AS sequencename,
+ pg_get_userbyid(C.relowner) AS sequenceowner,
+ p.start_value AS start_value,
+ p.minimum_value AS min_value,
+ p.maximum_value AS max_value,
+ p.increment AS increment_by,
+ p.cycle_option AS cycle,
+ p.cache_size AS cache_size,
+ pg_sequence_last_value(C.oid) AS last_value
+ FROM pg_class C LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace),
+ LATERAL pg_sequence_parameters(C.oid) p
+ WHERE NOT pg_is_other_temp_schema(N.oid)
+ AND relkind = 'S';
+
CREATE VIEW pg_stats WITH (security_barrier) AS
SELECT
nspname AS schemaname,
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
index fc3a8eebce..7e37108b8d 100644
--- a/src/backend/commands/sequence.c
+++ b/src/backend/commands/sequence.c
@@ -1534,8 +1534,8 @@ pg_sequence_parameters(PG_FUNCTION_ARGS)
{
Oid relid = PG_GETARG_OID(0);
TupleDesc tupdesc;
- Datum values[5];
- bool isnull[5];
+ Datum values[6];
+ bool isnull[6];
SeqTable elm;
Relation seqrel;
Buffer buf;
@@ -1551,7 +1551,7 @@ pg_sequence_parameters(PG_FUNCTION_ARGS)
errmsg("permission denied for sequence %s",
RelationGetRelationName(seqrel))));
- tupdesc = CreateTemplateTupleDesc(5, false);
+ tupdesc = CreateTemplateTupleDesc(6, false);
TupleDescInitEntry(tupdesc, (AttrNumber) 1, "start_value",
INT8OID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 2, "minimum_value",
@@ -1562,6 +1562,8 @@ pg_sequence_parameters(PG_FUNCTION_ARGS)
INT8OID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 5, "cycle_option",
BOOLOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 6, "cache_size",
+ INT8OID, -1, 0);
BlessTupleDesc(tupdesc);
@@ -1574,6 +1576,7 @@ pg_sequence_parameters(PG_FUNCTION_ARGS)
values[2] = Int64GetDatum(seq->max_value);
values[3] = Int64GetDatum(seq->increment_by);
values[4] = BoolGetDatum(seq->is_cycled);
+ values[5] = Int64GetDatum(seq->cache_value);
UnlockReleaseBuffer(buf);
relation_close(seqrel, NoLock);
@@ -1581,6 +1584,46 @@ pg_sequence_parameters(PG_FUNCTION_ARGS)
return HeapTupleGetDatum(heap_form_tuple(tupdesc, values, isnull));
}
+/*
+ * Return the last value from the sequence
+ *
+ * Note: This has a completely different meaning than lastval().
+ */
+Datum
+pg_sequence_last_value(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ SeqTable elm;
+ Relation seqrel;
+ Buffer buf;
+ HeapTupleData seqtuple;
+ Form_pg_sequence seq;
+ bool is_called;
+ int64 result;
+
+ /* open and AccessShareLock sequence */
+ init_sequence(relid, &elm, &seqrel);
+
+ if (pg_class_aclcheck(relid, GetUserId(), ACL_SELECT | ACL_USAGE) != ACLCHECK_OK)
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied for sequence %s",
+ RelationGetRelationName(seqrel))));
+
+ seq = read_seq_tuple(elm, seqrel, &buf, &seqtuple);
+
+ is_called = seq->is_called;
+ result = seq->last_value;
+
+ UnlockReleaseBuffer(buf);
+ relation_close(seqrel, NoLock);
+
+ if (is_called)
+ PG_RETURN_INT64(result);
+ else
+ PG_RETURN_NULL();
+}
+
void
seq_redo(XLogReaderState *record)
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 880559650a..ef0386ceda 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201611041
+#define CATALOG_VERSION_NO 201611181
#endif
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 17ec71d47e..047a1ce71c 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -1763,8 +1763,10 @@ DATA(insert OID = 1576 ( setval PGNSP PGUID 12 1 0 0 0 f f f f t f v u 2 0 20
DESCR("set sequence value");
DATA(insert OID = 1765 ( setval PGNSP PGUID 12 1 0 0 0 f f f f t f v u 3 0 20 "2205 20 16" _null_ _null_ _null_ _null_ _null_ setval3_oid _null_ _null_ _null_ ));
DESCR("set sequence value and is_called status");
-DATA(insert OID = 3078 ( pg_sequence_parameters PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2249 "26" "{26,20,20,20,20,16}" "{i,o,o,o,o,o}" "{sequence_oid,start_value,minimum_value,maximum_value,increment,cycle_option}" _null_ _null_ pg_sequence_parameters _null_ _null_ _null_));
+DATA(insert OID = 3078 ( pg_sequence_parameters PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2249 "26" "{26,20,20,20,20,16,20}" "{i,o,o,o,o,o,o}" "{sequence_oid,start_value,minimum_value,maximum_value,increment,cycle_option,cache_size}" _null_ _null_ pg_sequence_parameters _null_ _null_ _null_));
DESCR("sequence parameters, for use by information schema");
+DATA(insert OID = 4032 ( pg_sequence_last_value PGNSP PGUID 12 1 0 0 0 f f f f t f v u 1 0 20 "2205" _null_ _null_ _null_ _null_ _null_ pg_sequence_last_value _null_ _null_ _null_ ));
+DESCR("sequence last value");
DATA(insert OID = 1579 ( varbit_in PGNSP PGUID 12 1 0 0 0 f f f f t f i s 3 0 1562 "2275 26 23" _null_ _null_ _null_ _null_ _null_ varbit_in _null_ _null_ _null_ ));
DESCR("I/O");
diff --git a/src/include/commands/sequence.h b/src/include/commands/sequence.h
index 392a626508..6695bbe4c7 100644
--- a/src/include/commands/sequence.h
+++ b/src/include/commands/sequence.h
@@ -73,6 +73,7 @@ extern Datum setval3_oid(PG_FUNCTION_ARGS);
extern Datum lastval(PG_FUNCTION_ARGS);
extern Datum pg_sequence_parameters(PG_FUNCTION_ARGS);
+extern Datum pg_sequence_last_value(PG_FUNCTION_ARGS);
extern ObjectAddress DefineSequence(ParseState *pstate, CreateSeqStmt *stmt);
extern ObjectAddress AlterSequence(ParseState *pstate, AlterSeqStmt *stmt);
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 5e2962c681..031e8c2ef5 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1615,6 +1615,20 @@ UNION ALL
l.label
FROM (pg_shseclabel l
JOIN pg_authid rol ON (((l.classoid = rol.tableoid) AND (l.objoid = rol.oid))));
+pg_sequences| SELECT n.nspname AS schemaname,
+ c.relname AS sequencename,
+ pg_get_userbyid(c.relowner) AS sequenceowner,
+ p.start_value,
+ p.minimum_value AS min_value,
+ p.maximum_value AS max_value,
+ p.increment AS increment_by,
+ p.cycle_option AS cycle,
+ p.cache_size,
+ pg_sequence_last_value((c.oid)::regclass) AS last_value
+ FROM (pg_class c
+ LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))),
+ LATERAL pg_sequence_parameters(c.oid) p(start_value, minimum_value, maximum_value, increment, cycle_option, cache_size)
+ WHERE ((NOT pg_is_other_temp_schema(n.oid)) AND (c.relkind = 'S'::"char"));
pg_settings| SELECT a.name,
a.setting,
a.unit,
diff --git a/src/test/regress/expected/sequence.out b/src/test/regress/expected/sequence.out
index 4ffbe92ab3..c5413e09f3 100644
--- a/src/test/regress/expected/sequence.out
+++ b/src/test/regress/expected/sequence.out
@@ -300,20 +300,39 @@ SELECT nextval('sequence_test2');
5
(1 row)
+CREATE SEQUENCE sequence_test3; -- not read from, to test is_called
-- Information schema
SELECT * FROM information_schema.sequences WHERE sequence_name IN
- ('sequence_test2', 'serialtest2_f2_seq', 'serialtest2_f3_seq',
+ ('sequence_test2', 'sequence_test3', 'serialtest2_f2_seq', 'serialtest2_f3_seq',
'serialtest2_f4_seq', 'serialtest2_f5_seq', 'serialtest2_f6_seq')
ORDER BY sequence_name ASC;
sequence_catalog | sequence_schema | sequence_name | data_type | numeric_precision | numeric_precision_radix | numeric_scale | start_value | minimum_value | maximum_value | increment | cycle_option
------------------+-----------------+--------------------+-----------+-------------------+-------------------------+---------------+-------------+---------------+---------------------+-----------+--------------
regression | public | sequence_test2 | bigint | 64 | 2 | 0 | 32 | 5 | 36 | 4 | YES
+ regression | public | sequence_test3 | bigint | 64 | 2 | 0 | 1 | 1 | 9223372036854775807 | 1 | NO
regression | public | serialtest2_f2_seq | bigint | 64 | 2 | 0 | 1 | 1 | 9223372036854775807 | 1 | NO
regression | public | serialtest2_f3_seq | bigint | 64 | 2 | 0 | 1 | 1 | 9223372036854775807 | 1 | NO
regression | public | serialtest2_f4_seq | bigint | 64 | 2 | 0 | 1 | 1 | 9223372036854775807 | 1 | NO
regression | public | serialtest2_f5_seq | bigint | 64 | 2 | 0 | 1 | 1 | 9223372036854775807 | 1 | NO
regression | public | serialtest2_f6_seq | bigint | 64 | 2 | 0 | 1 | 1 | 9223372036854775807 | 1 | NO
-(6 rows)
+(7 rows)
+
+SELECT schemaname, sequencename, start_value, min_value, max_value, increment_by, cycle, cache_size, last_value
+FROM pg_sequences
+WHERE sequencename IN
+ ('sequence_test2', 'sequence_test3', 'serialtest2_f2_seq', 'serialtest2_f3_seq',
+ 'serialtest2_f4_seq', 'serialtest2_f5_seq', 'serialtest2_f6_seq')
+ ORDER BY sequencename ASC;
+ schemaname | sequencename | start_value | min_value | max_value | increment_by | cycle | cache_size | last_value
+------------+--------------------+-------------+-----------+---------------------+--------------+-------+------------+------------
+ public | sequence_test2 | 32 | 5 | 36 | 4 | t | 1 | 5
+ public | sequence_test3 | 1 | 1 | 9223372036854775807 | 1 | f | 1 |
+ public | serialtest2_f2_seq | 1 | 1 | 9223372036854775807 | 1 | f | 1 | 2
+ public | serialtest2_f3_seq | 1 | 1 | 9223372036854775807 | 1 | f | 1 | 2
+ public | serialtest2_f4_seq | 1 | 1 | 9223372036854775807 | 1 | f | 1 | 2
+ public | serialtest2_f5_seq | 1 | 1 | 9223372036854775807 | 1 | f | 1 | 2
+ public | serialtest2_f6_seq | 1 | 1 | 9223372036854775807 | 1 | f | 1 | 2
+(7 rows)
-- Test comments
COMMENT ON SEQUENCE asdf IS 'won''t work';
diff --git a/src/test/regress/expected/sequence_1.out b/src/test/regress/expected/sequence_1.out
index 05da2bf1ad..8164244927 100644
--- a/src/test/regress/expected/sequence_1.out
+++ b/src/test/regress/expected/sequence_1.out
@@ -300,20 +300,39 @@ SELECT nextval('sequence_test2');
5
(1 row)
+CREATE SEQUENCE sequence_test3; -- not read from, to test is_called
-- Information schema
SELECT * FROM information_schema.sequences WHERE sequence_name IN
- ('sequence_test2', 'serialtest2_f2_seq', 'serialtest2_f3_seq',
+ ('sequence_test2', 'sequence_test3', 'serialtest2_f2_seq', 'serialtest2_f3_seq',
'serialtest2_f4_seq', 'serialtest2_f5_seq', 'serialtest2_f6_seq')
ORDER BY sequence_name ASC;
sequence_catalog | sequence_schema | sequence_name | data_type | numeric_precision | numeric_precision_radix | numeric_scale | start_value | minimum_value | maximum_value | increment | cycle_option
------------------+-----------------+--------------------+-----------+-------------------+-------------------------+---------------+-------------+---------------+---------------------+-----------+--------------
regression | public | sequence_test2 | bigint | 64 | 2 | 0 | 32 | 5 | 36 | 4 | YES
+ regression | public | sequence_test3 | bigint | 64 | 2 | 0 | 1 | 1 | 9223372036854775807 | 1 | NO
regression | public | serialtest2_f2_seq | bigint | 64 | 2 | 0 | 1 | 1 | 9223372036854775807 | 1 | NO
regression | public | serialtest2_f3_seq | bigint | 64 | 2 | 0 | 1 | 1 | 9223372036854775807 | 1 | NO
regression | public | serialtest2_f4_seq | bigint | 64 | 2 | 0 | 1 | 1 | 9223372036854775807 | 1 | NO
regression | public | serialtest2_f5_seq | bigint | 64 | 2 | 0 | 1 | 1 | 9223372036854775807 | 1 | NO
regression | public | serialtest2_f6_seq | bigint | 64 | 2 | 0 | 1 | 1 | 9223372036854775807 | 1 | NO
-(6 rows)
+(7 rows)
+
+SELECT schemaname, sequencename, start_value, min_value, max_value, increment_by, cycle, cache_size, last_value
+FROM pg_sequences
+WHERE sequencename IN
+ ('sequence_test2', 'sequence_test3', 'serialtest2_f2_seq', 'serialtest2_f3_seq',
+ 'serialtest2_f4_seq', 'serialtest2_f5_seq', 'serialtest2_f6_seq')
+ ORDER BY sequencename ASC;
+ schemaname | sequencename | start_value | min_value | max_value | increment_by | cycle | cache_size | last_value
+------------+--------------------+-------------+-----------+---------------------+--------------+-------+------------+------------
+ public | sequence_test2 | 32 | 5 | 36 | 4 | t | 1 | 5
+ public | sequence_test3 | 1 | 1 | 9223372036854775807 | 1 | f | 1 |
+ public | serialtest2_f2_seq | 1 | 1 | 9223372036854775807 | 1 | f | 1 | 2
+ public | serialtest2_f3_seq | 1 | 1 | 9223372036854775807 | 1 | f | 1 | 2
+ public | serialtest2_f4_seq | 1 | 1 | 9223372036854775807 | 1 | f | 1 | 2
+ public | serialtest2_f5_seq | 1 | 1 | 9223372036854775807 | 1 | f | 1 | 2
+ public | serialtest2_f6_seq | 1 | 1 | 9223372036854775807 | 1 | f | 1 | 2
+(7 rows)
-- Test comments
COMMENT ON SEQUENCE asdf IS 'won''t work';
diff --git a/src/test/regress/sql/sequence.sql b/src/test/regress/sql/sequence.sql
index 98a2e7db36..e2b6b63393 100644
--- a/src/test/regress/sql/sequence.sql
+++ b/src/test/regress/sql/sequence.sql
@@ -138,12 +138,23 @@ SELECT nextval('sequence_test2');
SELECT nextval('sequence_test2');
SELECT nextval('sequence_test2');
+
+CREATE SEQUENCE sequence_test3; -- not read from, to test is_called
+
+
-- Information schema
SELECT * FROM information_schema.sequences WHERE sequence_name IN
- ('sequence_test2', 'serialtest2_f2_seq', 'serialtest2_f3_seq',
+ ('sequence_test2', 'sequence_test3', 'serialtest2_f2_seq', 'serialtest2_f3_seq',
'serialtest2_f4_seq', 'serialtest2_f5_seq', 'serialtest2_f6_seq')
ORDER BY sequence_name ASC;
+SELECT schemaname, sequencename, start_value, min_value, max_value, increment_by, cycle, cache_size, last_value
+FROM pg_sequences
+WHERE sequencename IN
+ ('sequence_test2', 'sequence_test3', 'serialtest2_f2_seq', 'serialtest2_f3_seq',
+ 'serialtest2_f4_seq', 'serialtest2_f5_seq', 'serialtest2_f6_seq')
+ ORDER BY sequencename ASC;
+
-- Test comments
COMMENT ON SEQUENCE asdf IS 'won''t work';
COMMENT ON SEQUENCE sequence_test2 IS 'will work';
--
cgit v1.2.3
From 1c7861e81b4220364bef75d2445e9c0619f3f3f8 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Tue, 22 Nov 2016 14:02:52 -0500
Subject: Doc: add a section in Part II concerning RETURNING.
There are assorted references to RETURNING in Part II, but nothing
that would qualify as an explanation of the feature, which seems
like an oversight considering how useful it is. Add something.
Noted while looking for a place to point a cross-reference to ...
---
doc/src/sgml/dml.sgml | 99 +++++++++++++++++++++++++++++++++++++++++++++++
doc/src/sgml/queries.sgml | 3 +-
2 files changed, 101 insertions(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/dml.sgml b/doc/src/sgml/dml.sgml
index cd36a73811..0c65578b59 100644
--- a/doc/src/sgml/dml.sgml
+++ b/doc/src/sgml/dml.sgml
@@ -102,6 +102,18 @@ INSERT INTO products (product_no, name, price) VALUES
+
+ It is also possible to insert the result of a query (which might be no
+ rows, one row, or many rows):
+
+INSERT INTO products (product_no, name, price)
+ SELECT product_no, name, price FROM new_products
+ WHERE release_date = 'today';
+
+ This provides the full power of the SQL query mechanism () for computing the rows to be inserted.
+
+
When inserting a lot of data at the same time, considering using
@@ -252,4 +264,91 @@ DELETE FROM products;
then all rows in the table will be deleted! Caveat programmer.
+
+
+ Returning Data From Modified Rows
+
+
+ RETURNING
+
+
+
+ INSERT
+ RETURNING
+
+
+
+ UPDATE
+ RETURNING
+
+
+
+ DELETE
+ RETURNING
+
+
+
+ Sometimes it is useful to obtain data from modified rows while they are
+ being manipulated. The INSERT>, UPDATE>,
+ and DELETE> commands all have an
+ optional RETURNING> clause that supports this. Use
+ of RETURNING> avoids performing an extra database query to
+ collect the data, and is especially valuable when it would otherwise be
+ difficult to identify the modified rows reliably.
+
+
+
+ The allowed contents of a RETURNING> clause are the same as
+ a SELECT> command's output list
+ (see ). It can contain column
+ names of the command's target table, or value expressions using those
+ columns. A common shorthand is RETURNING *>, which selects
+ all columns of the target table in order.
+
+
+
+ In an INSERT>, the data available to RETURNING> is
+ the row as it was inserted. This is not so useful in trivial inserts,
+ since it would just repeat the data provided by the client. But it can
+ be very handy when relying on computed default values. For example,
+ when using a serial>
+ column to provide unique identifiers, RETURNING> can return
+ the ID assigned to a new row:
+
+CREATE TABLE users (firstname text, lastname text, id serial primary key);
+
+INSERT INTO users (firstname, lastname) VALUES ('Joe', 'Cool') RETURNING id;
+
+ The RETURNING> clause is also very useful
+ with INSERT ... SELECT>.
+
+
+
+ In an UPDATE>, the data available to RETURNING> is
+ the new content of the modified row. For example:
+
+UPDATE products SET price = price * 1.10
+ WHERE price <= 99.99
+ RETURNING name, price AS new_price;
+
+
+
+
+ In a DELETE>, the data available to RETURNING> is
+ the content of the deleted row. For example:
+
+DELETE FROM products
+ WHERE obsoletion_date = 'today'
+ RETURNING *;
+
+
+
+
+ If there are triggers () on the target table,
+ the data available to RETURNING> is the row as modified by
+ the triggers. Thus, inspecting columns computed by triggers is another
+ common use-case for RETURNING>.
+
+
+
diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml
index 718262f1aa..548f835830 100644
--- a/doc/src/sgml/queries.sgml
+++ b/doc/src/sgml/queries.sgml
@@ -2262,7 +2262,8 @@ SELECT * FROM moved_rows;
Data-modifying statements in WITH> usually have
- RETURNING> clauses, as seen in the example above.
+ RETURNING> clauses (see ),
+ as shown in the example above.
It is the output of the RETURNING> clause, not> the
target table of the data-modifying statement, that forms the temporary
table that can be referred to by the rest of the query. If a
--
cgit v1.2.3
From 906bfcad7ba7cb3863fe0e2a7810be8e3cd84fbd Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Tue, 22 Nov 2016 15:19:57 -0500
Subject: Improve handling of "UPDATE ... SET (column_list) = row_constructor".
Previously, the right-hand side of a multiple-column assignment, if it
wasn't a sub-SELECT, had to be a simple parenthesized expression list,
because gram.y was responsible for "bursting" the construct into
independent column assignments. This had the minor defect that you
couldn't write ROW (though you should be able to, since the standard says
this is a row constructor), and the rather larger defect that unlike other
uses of row constructors, we would not expand a "foo.*" item into multiple
columns.
Fix that by changing the RHS to be just "a_expr" in the grammar, leaving
it to transformMultiAssignRef to separate the elements of a RowExpr;
which it will do only after performing standard transformation of the
RowExpr, so that "foo.*" behaves as expected.
The key reason we didn't do that before was the hard-wired handling of
DEFAULT tokens (SetToDefault nodes). This patch deals with that issue by
allowing DEFAULT in any a_expr and having parse analysis throw an error
if SetToDefault is found in an unexpected place. That's an improvement
anyway since the error can be more specific than just "syntax error".
The SQL standard suggests that the RHS could be any a_expr yielding a
suitable row value. This patch doesn't really move the goal posts in that
respect --- you're still limited to RowExpr or a sub-SELECT --- but it does
fix the grammar restriction, so it provides some tangible progress towards
a full implementation. And the limitation is now documented by an explicit
error message rather than an unhelpful "syntax error".
Discussion: <8542.1479742008@sss.pgh.pa.us>
---
doc/src/sgml/ref/update.sgml | 14 +--
src/backend/parser/analyze.c | 35 ++++---
src/backend/parser/gram.y | 125 ++++++------------------
src/backend/parser/parse_expr.c | 181 ++++++++++++++++++++++++++---------
src/backend/parser/parse_target.c | 30 +++++-
src/include/parser/parse_target.h | 2 +-
src/test/regress/expected/update.out | 18 ++--
src/test/regress/sql/update.sql | 7 +-
8 files changed, 230 insertions(+), 182 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/update.sgml b/doc/src/sgml/ref/update.sgml
index c50434f85f..2de0f4aad1 100644
--- a/doc/src/sgml/ref/update.sgml
+++ b/doc/src/sgml/ref/update.sgml
@@ -24,7 +24,7 @@ PostgreSQL documentation
[ WITH [ RECURSIVE ] with_query [, ...] ]
UPDATE [ ONLY ] table_name [ * ] [ [ AS ] alias ]
SET { column_name = { expression | DEFAULT } |
- ( column_name [, ...] ) = ( { expression | DEFAULT } [, ...] ) |
+ ( column_name [, ...] ) = [ ROW ] ( { expression | DEFAULT } [, ...] ) |
( column_name [, ...] ) = ( sub-SELECT )
} [, ...]
[ FROM from_list ]
@@ -420,12 +420,12 @@ UPDATE films SET kind = 'Dramatic' WHERE CURRENT OF c_films;
According to the standard, the source value for a parenthesized sub-list of
- column names can be any row-valued expression yielding the correct number
- of columns. PostgreSQL only allows the source
- value to be a parenthesized list of expressions (a row constructor) or a
- sub-SELECT>. An individual column's updated value can be
- specified as DEFAULT> in the row-constructor case, but not
- inside a sub-SELECT>.
+ target column names can be any row-valued expression yielding the correct
+ number of columns. PostgreSQL only allows the
+ source value to be a row
+ constructor or a sub-SELECT>. An individual column's
+ updated value can be specified as DEFAULT> in the
+ row-constructor case, but not inside a sub-SELECT>.
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 6901e08fd9..36f8c548eb 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -644,8 +644,12 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
{
List *sublist = (List *) lfirst(lc);
- /* Do basic expression transformation (same as a ROW() expr) */
- sublist = transformExpressionList(pstate, sublist, EXPR_KIND_VALUES);
+ /*
+ * Do basic expression transformation (same as a ROW() expr, but
+ * allow SetToDefault at top level)
+ */
+ sublist = transformExpressionList(pstate, sublist,
+ EXPR_KIND_VALUES, true);
/*
* All the sublists must be the same length, *after*
@@ -752,10 +756,14 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
Assert(list_length(valuesLists) == 1);
Assert(selectStmt->intoClause == NULL);
- /* Do basic expression transformation (same as a ROW() expr) */
+ /*
+ * Do basic expression transformation (same as a ROW() expr, but allow
+ * SetToDefault at top level)
+ */
exprList = transformExpressionList(pstate,
(List *) linitial(valuesLists),
- EXPR_KIND_VALUES);
+ EXPR_KIND_VALUES,
+ true);
/* Prepare row for assignment to target table */
exprList = transformInsertRow(pstate, exprList,
@@ -1293,9 +1301,7 @@ transformValuesClause(ParseState *pstate, SelectStmt *stmt)
}
/*
- * For each row of VALUES, transform the raw expressions. This is also a
- * handy place to reject DEFAULT nodes, which the grammar allows for
- * simplicity.
+ * For each row of VALUES, transform the raw expressions.
*
* Note that the intermediate representation we build is column-organized
* not row-organized. That simplifies the type and collation processing
@@ -1305,8 +1311,12 @@ transformValuesClause(ParseState *pstate, SelectStmt *stmt)
{
List *sublist = (List *) lfirst(lc);
- /* Do basic expression transformation (same as a ROW() expr) */
- sublist = transformExpressionList(pstate, sublist, EXPR_KIND_VALUES);
+ /*
+ * Do basic expression transformation (same as a ROW() expr, but here
+ * we disallow SetToDefault)
+ */
+ sublist = transformExpressionList(pstate, sublist,
+ EXPR_KIND_VALUES, false);
/*
* All the sublists must be the same length, *after* transformation
@@ -1329,17 +1339,12 @@ transformValuesClause(ParseState *pstate, SelectStmt *stmt)
exprLocation((Node *) sublist))));
}
- /* Check for DEFAULT and build per-column expression lists */
+ /* Build per-column expression lists */
i = 0;
foreach(lc2, sublist)
{
Node *col = (Node *) lfirst(lc2);
- if (IsA(col, SetToDefault))
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("DEFAULT can only appear in a VALUES list within INSERT"),
- parser_errposition(pstate, exprLocation(col))));
colexprs[i] = lappend(colexprs[i], col);
i++;
}
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 0ec1cd345b..367bc2ecff 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -365,8 +365,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
qualified_name_list any_name any_name_list type_name_list
any_operator expr_list attrs
target_list opt_target_list insert_column_list set_target_list
- set_clause_list set_clause multiple_set_clause
- ctext_expr_list ctext_row def_list operator_def_list indirection opt_indirection
+ set_clause_list set_clause
+ def_list operator_def_list indirection opt_indirection
reloption_list group_clause TriggerFuncArgs select_limit
opt_select_limit opclass_item_list opclass_drop_list
opclass_purpose opt_opfamily transaction_mode_list_or_empty
@@ -454,7 +454,6 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%type case_expr case_arg when_clause case_default
%type when_clause_list
%type sub_type
-%type ctext_expr
%type NumericOnly
%type NumericOnly_list
%type alias_clause opt_alias_clause
@@ -466,7 +465,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%type relation_expr
%type relation_expr_opt_alias
%type tablesample_clause opt_repeatable_clause
-%type target_el single_set_clause set_target insert_column_item
+%type target_el set_target insert_column_item
%type generic_option_name
%type generic_option_arg
@@ -9914,75 +9913,24 @@ set_clause_list:
;
set_clause:
- single_set_clause { $$ = list_make1($1); }
- | multiple_set_clause { $$ = $1; }
- ;
-
-single_set_clause:
- set_target '=' ctext_expr
- {
- $$ = $1;
- $$->val = (Node *) $3;
- }
- ;
-
-/*
- * Ideally, we'd accept any row-valued a_expr as RHS of a multiple_set_clause.
- * However, per SQL spec the row-constructor case must allow DEFAULT as a row
- * member, and it's pretty unclear how to do that (unless perhaps we allow
- * DEFAULT in any a_expr and let parse analysis sort it out later?). For the
- * moment, the planner/executor only support a subquery as a multiassignment
- * source anyhow, so we need only accept ctext_row and subqueries here.
- */
-multiple_set_clause:
- '(' set_target_list ')' '=' ctext_row
+ set_target '=' a_expr
{
- ListCell *col_cell;
- ListCell *val_cell;
-
- /*
- * Break the ctext_row apart, merge individual expressions
- * into the destination ResTargets. This is semantically
- * equivalent to, and much cheaper to process than, the
- * general case.
- */
- if (list_length($2) != list_length($5))
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("number of columns does not match number of values"),
- parser_errposition(@5)));
- forboth(col_cell, $2, val_cell, $5)
- {
- ResTarget *res_col = (ResTarget *) lfirst(col_cell);
- Node *res_val = (Node *) lfirst(val_cell);
-
- res_col->val = res_val;
- }
-
- $$ = $2;
+ $1->val = (Node *) $3;
+ $$ = list_make1($1);
}
- | '(' set_target_list ')' '=' select_with_parens
+ | '(' set_target_list ')' '=' a_expr
{
- SubLink *sl = makeNode(SubLink);
int ncolumns = list_length($2);
int i = 1;
ListCell *col_cell;
- /* First, convert bare SelectStmt into a SubLink */
- sl->subLinkType = MULTIEXPR_SUBLINK;
- sl->subLinkId = 0; /* will be assigned later */
- sl->testexpr = NULL;
- sl->operName = NIL;
- sl->subselect = $5;
- sl->location = @5;
-
/* Create a MultiAssignRef source for each target */
foreach(col_cell, $2)
{
ResTarget *res_col = (ResTarget *) lfirst(col_cell);
MultiAssignRef *r = makeNode(MultiAssignRef);
- r->source = (Node *) sl;
+ r->source = (Node *) $5;
r->colno = i;
r->ncolumns = ncolumns;
res_col->val = (Node *) r;
@@ -10641,17 +10589,22 @@ locked_rels_list:
;
+/*
+ * We should allow ROW '(' expr_list ')' too, but that seems to require
+ * making VALUES a fully reserved word, which will probably break more apps
+ * than allowing the noise-word is worth.
+ */
values_clause:
- VALUES ctext_row
+ VALUES '(' expr_list ')'
{
SelectStmt *n = makeNode(SelectStmt);
- n->valuesLists = list_make1($2);
+ n->valuesLists = list_make1($3);
$$ = (Node *) n;
}
- | values_clause ',' ctext_row
+ | values_clause ',' '(' expr_list ')'
{
SelectStmt *n = (SelectStmt *) $1;
- n->valuesLists = lappend(n->valuesLists, $3);
+ n->valuesLists = lappend(n->valuesLists, $4);
$$ = (Node *) n;
}
;
@@ -12042,6 +11995,20 @@ a_expr: c_expr { $$ = $1; }
list_make1($1), @2),
@2);
}
+ | DEFAULT
+ {
+ /*
+ * The SQL spec only allows DEFAULT in "contextually typed
+ * expressions", but for us, it's easier to allow it in
+ * any a_expr and then throw error during parse analysis
+ * if it's in an inappropriate context. This way also
+ * lets us say something smarter than "syntax error".
+ */
+ SetToDefault *n = makeNode(SetToDefault);
+ /* parse analysis will fill in the rest */
+ n->location = @1;
+ $$ = (Node *)n;
+ }
;
/*
@@ -13297,36 +13264,6 @@ opt_asymmetric: ASYMMETRIC
| /*EMPTY*/
;
-/*
- * The SQL spec defines "contextually typed value expressions" and
- * "contextually typed row value constructors", which for our purposes
- * are the same as "a_expr" and "row" except that DEFAULT can appear at
- * the top level.
- */
-
-ctext_expr:
- a_expr { $$ = (Node *) $1; }
- | DEFAULT
- {
- SetToDefault *n = makeNode(SetToDefault);
- n->location = @1;
- $$ = (Node *) n;
- }
- ;
-
-ctext_expr_list:
- ctext_expr { $$ = list_make1($1); }
- | ctext_expr_list ',' ctext_expr { $$ = lappend($1, $3); }
- ;
-
-/*
- * We should allow ROW '(' ctext_expr_list ')' too, but that seems to require
- * making VALUES a fully reserved word, which will probably break more apps
- * than allowing the noise-word is worth.
- */
-ctext_row: '(' ctext_expr_list ')' { $$ = $2; }
- ;
-
/*****************************************************************************
*
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 63f7965532..17d1cbf8b3 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -106,7 +106,7 @@ static Node *transformCaseExpr(ParseState *pstate, CaseExpr *c);
static Node *transformSubLink(ParseState *pstate, SubLink *sublink);
static Node *transformArrayExpr(ParseState *pstate, A_ArrayExpr *a,
Oid array_type, Oid element_type, int32 typmod);
-static Node *transformRowExpr(ParseState *pstate, RowExpr *r);
+static Node *transformRowExpr(ParseState *pstate, RowExpr *r, bool allowDefault);
static Node *transformCoalesceExpr(ParseState *pstate, CoalesceExpr *c);
static Node *transformMinMaxExpr(ParseState *pstate, MinMaxExpr *m);
static Node *transformSQLValueFunction(ParseState *pstate,
@@ -299,7 +299,7 @@ transformExprRecurse(ParseState *pstate, Node *expr)
break;
case T_RowExpr:
- result = transformRowExpr(pstate, (RowExpr *) expr);
+ result = transformRowExpr(pstate, (RowExpr *) expr, false);
break;
case T_CoalesceExpr:
@@ -348,8 +348,20 @@ transformExprRecurse(ParseState *pstate, Node *expr)
break;
/*
- * CaseTestExpr and SetToDefault don't require any processing;
- * they are only injected into parse trees in fully-formed state.
+ * In all places where DEFAULT is legal, the caller should have
+ * processed it rather than passing it to transformExpr().
+ */
+ case T_SetToDefault:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("DEFAULT is not allowed in this context"),
+ parser_errposition(pstate,
+ ((SetToDefault *) expr)->location)));
+ break;
+
+ /*
+ * CaseTestExpr doesn't require any processing; it is only
+ * injected into parse trees in a fully-formed state.
*
* Ordinarily we should not see a Var here, but it is convenient
* for transformJoinUsingClause() to create untransformed operator
@@ -358,7 +370,6 @@ transformExprRecurse(ParseState *pstate, Node *expr)
* references, which seems expensively pointless. So allow it.
*/
case T_CaseTestExpr:
- case T_SetToDefault:
case T_Var:
{
result = (Node *) expr;
@@ -1486,9 +1497,9 @@ static Node *
transformMultiAssignRef(ParseState *pstate, MultiAssignRef *maref)
{
SubLink *sublink;
+ RowExpr *rexpr;
Query *qtree;
TargetEntry *tle;
- Param *param;
/* We should only see this in first-stage processing of UPDATE tlists */
Assert(pstate->p_expr_kind == EXPR_KIND_UPDATE_SOURCE);
@@ -1496,64 +1507,139 @@ transformMultiAssignRef(ParseState *pstate, MultiAssignRef *maref)
/* We only need to transform the source if this is the first column */
if (maref->colno == 1)
{
- sublink = (SubLink *) transformExprRecurse(pstate, maref->source);
- /* Currently, the grammar only allows a SubLink as source */
- Assert(IsA(sublink, SubLink));
- Assert(sublink->subLinkType == MULTIEXPR_SUBLINK);
- qtree = (Query *) sublink->subselect;
- Assert(IsA(qtree, Query));
-
- /* Check subquery returns required number of columns */
- if (count_nonjunk_tlist_entries(qtree->targetList) != maref->ncolumns)
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("number of columns does not match number of values"),
- parser_errposition(pstate, sublink->location)));
-
/*
- * Build a resjunk tlist item containing the MULTIEXPR SubLink, and
- * add it to pstate->p_multiassign_exprs, whence it will later get
- * appended to the completed targetlist. We needn't worry about
- * selecting a resno for it; transformUpdateStmt will do that.
+ * For now, we only allow EXPR SubLinks and RowExprs as the source of
+ * an UPDATE multiassignment. This is sufficient to cover interesting
+ * cases; at worst, someone would have to write (SELECT * FROM expr)
+ * to expand a composite-returning expression of another form.
*/
- tle = makeTargetEntry((Expr *) sublink, 0, NULL, true);
- pstate->p_multiassign_exprs = lappend(pstate->p_multiassign_exprs, tle);
+ if (IsA(maref->source, SubLink) &&
+ ((SubLink *) maref->source)->subLinkType == EXPR_SUBLINK)
+ {
+ /* Relabel it as a MULTIEXPR_SUBLINK */
+ sublink = (SubLink *) maref->source;
+ sublink->subLinkType = MULTIEXPR_SUBLINK;
+ /* And transform it */
+ sublink = (SubLink *) transformExprRecurse(pstate,
+ (Node *) sublink);
+
+ qtree = (Query *) sublink->subselect;
+ Assert(IsA(qtree, Query));
+
+ /* Check subquery returns required number of columns */
+ if (count_nonjunk_tlist_entries(qtree->targetList) != maref->ncolumns)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("number of columns does not match number of values"),
+ parser_errposition(pstate, sublink->location)));
- /*
- * Assign a unique-within-this-targetlist ID to the MULTIEXPR SubLink.
- * We can just use its position in the p_multiassign_exprs list.
- */
- sublink->subLinkId = list_length(pstate->p_multiassign_exprs);
+ /*
+ * Build a resjunk tlist item containing the MULTIEXPR SubLink,
+ * and add it to pstate->p_multiassign_exprs, whence it will later
+ * get appended to the completed targetlist. We needn't worry
+ * about selecting a resno for it; transformUpdateStmt will do
+ * that.
+ */
+ tle = makeTargetEntry((Expr *) sublink, 0, NULL, true);
+ pstate->p_multiassign_exprs = lappend(pstate->p_multiassign_exprs,
+ tle);
+
+ /*
+ * Assign a unique-within-this-targetlist ID to the MULTIEXPR
+ * SubLink. We can just use its position in the
+ * p_multiassign_exprs list.
+ */
+ sublink->subLinkId = list_length(pstate->p_multiassign_exprs);
+ }
+ else if (IsA(maref->source, RowExpr))
+ {
+ /* Transform the RowExpr, allowing SetToDefault items */
+ rexpr = (RowExpr *) transformRowExpr(pstate,
+ (RowExpr *) maref->source,
+ true);
+
+ /* Check it returns required number of columns */
+ if (list_length(rexpr->args) != maref->ncolumns)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("number of columns does not match number of values"),
+ parser_errposition(pstate, rexpr->location)));
+
+ /*
+ * Temporarily append it to p_multiassign_exprs, so we can get it
+ * back when we come back here for additional columns.
+ */
+ tle = makeTargetEntry((Expr *) rexpr, 0, NULL, true);
+ pstate->p_multiassign_exprs = lappend(pstate->p_multiassign_exprs,
+ tle);
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("source for a multiple-column UPDATE item must be a sub-SELECT or ROW() expression"),
+ parser_errposition(pstate, exprLocation(maref->source))));
}
else
{
/*
* Second or later column in a multiassignment. Re-fetch the
- * transformed query, which we assume is still the last entry in
- * p_multiassign_exprs.
+ * transformed SubLink or RowExpr, which we assume is still the last
+ * entry in p_multiassign_exprs.
*/
Assert(pstate->p_multiassign_exprs != NIL);
tle = (TargetEntry *) llast(pstate->p_multiassign_exprs);
+ }
+
+ /*
+ * Emit the appropriate output expression for the current column
+ */
+ if (IsA(tle->expr, SubLink))
+ {
+ Param *param;
+
sublink = (SubLink *) tle->expr;
- Assert(IsA(sublink, SubLink));
Assert(sublink->subLinkType == MULTIEXPR_SUBLINK);
qtree = (Query *) sublink->subselect;
Assert(IsA(qtree, Query));
+
+ /* Build a Param representing the current subquery output column */
+ tle = (TargetEntry *) list_nth(qtree->targetList, maref->colno - 1);
+ Assert(!tle->resjunk);
+
+ param = makeNode(Param);
+ param->paramkind = PARAM_MULTIEXPR;
+ param->paramid = (sublink->subLinkId << 16) | maref->colno;
+ param->paramtype = exprType((Node *) tle->expr);
+ param->paramtypmod = exprTypmod((Node *) tle->expr);
+ param->paramcollid = exprCollation((Node *) tle->expr);
+ param->location = exprLocation((Node *) tle->expr);
+
+ return (Node *) param;
}
- /* Build a Param representing the appropriate subquery output column */
- tle = (TargetEntry *) list_nth(qtree->targetList, maref->colno - 1);
- Assert(!tle->resjunk);
+ if (IsA(tle->expr, RowExpr))
+ {
+ Node *result;
+
+ rexpr = (RowExpr *) tle->expr;
- param = makeNode(Param);
- param->paramkind = PARAM_MULTIEXPR;
- param->paramid = (sublink->subLinkId << 16) | maref->colno;
- param->paramtype = exprType((Node *) tle->expr);
- param->paramtypmod = exprTypmod((Node *) tle->expr);
- param->paramcollid = exprCollation((Node *) tle->expr);
- param->location = exprLocation((Node *) tle->expr);
+ /* Just extract and return the next element of the RowExpr */
+ result = (Node *) list_nth(rexpr->args, maref->colno - 1);
+
+ /*
+ * If we're at the last column, delete the RowExpr from
+ * p_multiassign_exprs; we don't need it anymore, and don't want it in
+ * the finished UPDATE tlist.
+ */
+ if (maref->colno == maref->ncolumns)
+ pstate->p_multiassign_exprs =
+ list_delete_ptr(pstate->p_multiassign_exprs, tle);
+
+ return result;
+ }
- return (Node *) param;
+ elog(ERROR, "unexpected expr type in multiassign list");
+ return NULL; /* keep compiler quiet */
}
static Node *
@@ -2081,7 +2167,7 @@ transformArrayExpr(ParseState *pstate, A_ArrayExpr *a,
}
static Node *
-transformRowExpr(ParseState *pstate, RowExpr *r)
+transformRowExpr(ParseState *pstate, RowExpr *r, bool allowDefault)
{
RowExpr *newr;
char fname[16];
@@ -2091,7 +2177,8 @@ transformRowExpr(ParseState *pstate, RowExpr *r)
newr = makeNode(RowExpr);
/* Transform the field expressions */
- newr->args = transformExpressionList(pstate, r->args, pstate->p_expr_kind);
+ newr->args = transformExpressionList(pstate, r->args,
+ pstate->p_expr_kind, allowDefault);
/* Barring later casting, we consider the type RECORD */
newr->row_typeid = RECORDOID;
diff --git a/src/backend/parser/parse_target.c b/src/backend/parser/parse_target.c
index a76c33f40e..d440dec556 100644
--- a/src/backend/parser/parse_target.c
+++ b/src/backend/parser/parse_target.c
@@ -91,7 +91,17 @@ transformTargetEntry(ParseState *pstate,
{
/* Transform the node if caller didn't do it already */
if (expr == NULL)
- expr = transformExpr(pstate, node, exprKind);
+ {
+ /*
+ * If it's a SetToDefault node and we should allow that, pass it
+ * through unmodified. (transformExpr will throw the appropriate
+ * error if we're disallowing it.)
+ */
+ if (exprKind == EXPR_KIND_UPDATE_SOURCE && IsA(node, SetToDefault))
+ expr = node;
+ else
+ expr = transformExpr(pstate, node, exprKind);
+ }
if (colname == NULL && !resjunk)
{
@@ -210,10 +220,13 @@ transformTargetList(ParseState *pstate, List *targetlist,
* the input list elements are bare expressions without ResTarget decoration,
* and the output elements are likewise just expressions without TargetEntry
* decoration. We use this for ROW() and VALUES() constructs.
+ *
+ * exprKind is not enough to tell us whether to allow SetToDefault, so
+ * an additional flag is needed for that.
*/
List *
transformExpressionList(ParseState *pstate, List *exprlist,
- ParseExprKind exprKind)
+ ParseExprKind exprKind, bool allowDefault)
{
List *result = NIL;
ListCell *lc;
@@ -255,10 +268,17 @@ transformExpressionList(ParseState *pstate, List *exprlist,
}
/*
- * Not "something.*", so transform as a single expression
+ * Not "something.*", so transform as a single expression. If it's a
+ * SetToDefault node and we should allow that, pass it through
+ * unmodified. (transformExpr will throw the appropriate error if
+ * we're disallowing it.)
*/
- result = lappend(result,
- transformExpr(pstate, e, exprKind));
+ if (allowDefault && IsA(e, SetToDefault))
+ /* do nothing */ ;
+ else
+ e = transformExpr(pstate, e, exprKind);
+
+ result = lappend(result, e);
}
/* Shouldn't have any multiassign items here */
diff --git a/src/include/parser/parse_target.h b/src/include/parser/parse_target.h
index 8d4ad60026..f85c618c43 100644
--- a/src/include/parser/parse_target.h
+++ b/src/include/parser/parse_target.h
@@ -20,7 +20,7 @@
extern List *transformTargetList(ParseState *pstate, List *targetlist,
ParseExprKind exprKind);
extern List *transformExpressionList(ParseState *pstate, List *exprlist,
- ParseExprKind exprKind);
+ ParseExprKind exprKind, bool allowDefault);
extern void markTargetListOrigins(ParseState *pstate, List *targetlist);
extern TargetEntry *transformTargetEntry(ParseState *pstate,
Node *node, Node *expr, ParseExprKind exprKind,
diff --git a/src/test/regress/expected/update.out b/src/test/regress/expected/update.out
index 49730ea3c5..609899e1f7 100644
--- a/src/test/regress/expected/update.out
+++ b/src/test/regress/expected/update.out
@@ -140,17 +140,15 @@ SELECT * FROM update_test;
| |
(4 rows)
--- these should work, but don't yet:
-UPDATE update_test SET (a,b) = (v.*) FROM (VALUES(21, 100)) AS v(i, j)
+-- *-expansion should work in this context:
+UPDATE update_test SET (a,b) = ROW(v.*) FROM (VALUES(21, 100)) AS v(i, j)
WHERE update_test.a = v.i;
-ERROR: number of columns does not match number of values
-LINE 1: UPDATE update_test SET (a,b) = (v.*) FROM (VALUES(21, 100)) ...
- ^
-UPDATE update_test SET (a,b) = ROW(v.*) FROM (VALUES(21, 101)) AS v(i, j)
+-- you might expect this to work, but syntactically it's not a RowExpr:
+UPDATE update_test SET (a,b) = (v.*) FROM (VALUES(21, 101)) AS v(i, j)
WHERE update_test.a = v.i;
-ERROR: syntax error at or near "ROW"
-LINE 1: UPDATE update_test SET (a,b) = ROW(v.*) FROM (VALUES(21, 101...
- ^
+ERROR: source for a multiple-column UPDATE item must be a sub-SELECT or ROW() expression
+LINE 1: UPDATE update_test SET (a,b) = (v.*) FROM (VALUES(21, 101)) ...
+ ^
-- if an alias for the target table is specified, don't allow references
-- to the original table name
UPDATE update_test AS t SET b = update_test.b + 10 WHERE t.a = 10;
@@ -163,8 +161,8 @@ UPDATE update_test SET c = repeat('x', 10000) WHERE c = 'car';
SELECT a, b, char_length(c) FROM update_test;
a | b | char_length
----+-----+-------------
- 21 | 101 |
| |
+ 21 | 100 |
41 | 12 | 10000
42 | 12 | 10000
(4 rows)
diff --git a/src/test/regress/sql/update.sql b/src/test/regress/sql/update.sql
index e0cf5d12a9..ad58273b38 100644
--- a/src/test/regress/sql/update.sql
+++ b/src/test/regress/sql/update.sql
@@ -74,10 +74,11 @@ UPDATE update_test SET (b,a) = (select a+1,b from update_test);
UPDATE update_test SET (b,a) = (select a+1,b from update_test where a = 1000)
WHERE a = 11;
SELECT * FROM update_test;
--- these should work, but don't yet:
-UPDATE update_test SET (a,b) = (v.*) FROM (VALUES(21, 100)) AS v(i, j)
+-- *-expansion should work in this context:
+UPDATE update_test SET (a,b) = ROW(v.*) FROM (VALUES(21, 100)) AS v(i, j)
WHERE update_test.a = v.i;
-UPDATE update_test SET (a,b) = ROW(v.*) FROM (VALUES(21, 101)) AS v(i, j)
+-- you might expect this to work, but syntactically it's not a RowExpr:
+UPDATE update_test SET (a,b) = (v.*) FROM (VALUES(21, 101)) AS v(i, j)
WHERE update_test.a = v.i;
-- if an alias for the target table is specified, don't allow references
--
cgit v1.2.3
From 9a1d0af4ad2cbd419115b453d811c141b80d872b Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Tue, 22 Nov 2016 15:32:13 -0500
Subject: Code review for commit 274bb2b3857cc987cfa21d14775cae9b0dababa5.
Avoid memory leak in conninfo_uri_parse_options. Use the current host
rather than the comma-separated list of host names when the host name
is needed for GSS, SSPI, or SSL authentication. Document the way
connect_timeout interacts with multiple host specifications.
Takayuki Tsunakawa
---
doc/src/sgml/libpq.sgml | 4 ++++
src/interfaces/libpq/fe-auth.c | 12 +++++++-----
src/interfaces/libpq/fe-connect.c | 9 +++++----
src/interfaces/libpq/fe-secure-openssl.c | 12 +++++++-----
4 files changed, 23 insertions(+), 14 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml
index d04dba7493..0f375bf5f2 100644
--- a/doc/src/sgml/libpq.sgml
+++ b/doc/src/sgml/libpq.sgml
@@ -1009,6 +1009,10 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
Maximum wait for connection, in seconds (write as a decimal integer
string). Zero or not specified means wait indefinitely. It is not
recommended to use a timeout of less than 2 seconds.
+ This timeout applies separately to each connection attempt.
+ For example, if you specify two hosts and both of them are unreachable,
+ and connect_timeout> is 5, the total time spent waiting for a
+ connection might be up to 10 seconds.
diff --git a/src/interfaces/libpq/fe-auth.c b/src/interfaces/libpq/fe-auth.c
index 19171fb676..d861dc487b 100644
--- a/src/interfaces/libpq/fe-auth.c
+++ b/src/interfaces/libpq/fe-auth.c
@@ -170,8 +170,9 @@ pg_GSS_startup(PGconn *conn)
min_stat;
int maxlen;
gss_buffer_desc temp_gbuf;
+ char *host = PQhost(conn);
- if (!(conn->pghost && conn->pghost[0] != '\0'))
+ if (!(host && host[0] != '\0'))
{
printfPQExpBuffer(&conn->errorMessage,
libpq_gettext("host name must be specified\n"));
@@ -198,7 +199,7 @@ pg_GSS_startup(PGconn *conn)
return STATUS_ERROR;
}
snprintf(temp_gbuf.value, maxlen, "%s@%s",
- conn->krbsrvname, conn->pghost);
+ conn->krbsrvname, host);
temp_gbuf.length = strlen(temp_gbuf.value);
maj_stat = gss_import_name(&min_stat, &temp_gbuf,
@@ -371,6 +372,7 @@ pg_SSPI_startup(PGconn *conn, int use_negotiate)
{
SECURITY_STATUS r;
TimeStamp expire;
+ char *host = PQhost(conn);
conn->sspictx = NULL;
@@ -406,19 +408,19 @@ pg_SSPI_startup(PGconn *conn, int use_negotiate)
* but not more complex. We can skip the @REALM part, because Windows will
* fill that in for us automatically.
*/
- if (!(conn->pghost && conn->pghost[0] != '\0'))
+ if (!(host && host[0] != '\0'))
{
printfPQExpBuffer(&conn->errorMessage,
libpq_gettext("host name must be specified\n"));
return STATUS_ERROR;
}
- conn->sspitarget = malloc(strlen(conn->krbsrvname) + strlen(conn->pghost) + 2);
+ conn->sspitarget = malloc(strlen(conn->krbsrvname) + strlen(host) + 2);
if (!conn->sspitarget)
{
printfPQExpBuffer(&conn->errorMessage, libpq_gettext("out of memory\n"));
return STATUS_ERROR;
}
- sprintf(conn->sspitarget, "%s/%s", conn->krbsrvname, conn->pghost);
+ sprintf(conn->sspitarget, "%s/%s", conn->krbsrvname, host);
/*
* Indicate that we're in SSPI authentication mode to make sure that
diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index ae85db9dd5..3e9c45bc40 100644
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -4931,7 +4931,7 @@ conninfo_uri_parse_options(PQconninfoOption *options, const char *uri,
{
int prefix_len;
char *p;
- char *buf;
+ char *buf = NULL;
char *start;
char prevchar = '\0';
char *user = NULL;
@@ -4946,7 +4946,7 @@ conninfo_uri_parse_options(PQconninfoOption *options, const char *uri,
{
printfPQExpBuffer(errorMessage,
libpq_gettext("out of memory\n"));
- return false;
+ goto cleanup;
}
/* need a modifiable copy of the input URI */
@@ -4955,7 +4955,7 @@ conninfo_uri_parse_options(PQconninfoOption *options, const char *uri,
{
printfPQExpBuffer(errorMessage,
libpq_gettext("out of memory\n"));
- return false;
+ goto cleanup;
}
start = buf;
@@ -5156,7 +5156,8 @@ conninfo_uri_parse_options(PQconninfoOption *options, const char *uri,
cleanup:
termPQExpBuffer(&hostbuf);
termPQExpBuffer(&portbuf);
- free(buf);
+ if (buf)
+ free(buf);
return retval;
}
diff --git a/src/interfaces/libpq/fe-secure-openssl.c b/src/interfaces/libpq/fe-secure-openssl.c
index f474c96f5f..7bdf92701a 100644
--- a/src/interfaces/libpq/fe-secure-openssl.c
+++ b/src/interfaces/libpq/fe-secure-openssl.c
@@ -483,6 +483,7 @@ verify_peer_name_matches_certificate_name(PGconn *conn, ASN1_STRING *name_entry,
char *name;
const unsigned char *namedata;
int result;
+ char *host = PQhost(conn);
*store_name = NULL;
@@ -528,12 +529,12 @@ verify_peer_name_matches_certificate_name(PGconn *conn, ASN1_STRING *name_entry,
return -1;
}
- if (pg_strcasecmp(name, conn->pghost) == 0)
+ if (pg_strcasecmp(name, host) == 0)
{
/* Exact name match */
result = 1;
}
- else if (wildcard_certificate_match(name, conn->pghost))
+ else if (wildcard_certificate_match(name, host))
{
/* Matched wildcard name */
result = 1;
@@ -563,6 +564,7 @@ verify_peer_name_matches_certificate(PGconn *conn)
STACK_OF(GENERAL_NAME) *peer_san;
int i;
int rc;
+ char *host = PQhost(conn);
/*
* If told not to verify the peer name, don't do it. Return true
@@ -572,7 +574,7 @@ verify_peer_name_matches_certificate(PGconn *conn)
return true;
/* Check that we have a hostname to compare with. */
- if (!(conn->pghost && conn->pghost[0] != '\0'))
+ if (!(host && host[0] != '\0'))
{
printfPQExpBuffer(&conn->errorMessage,
libpq_gettext("host name must be specified for a verified SSL connection\n"));
@@ -670,13 +672,13 @@ verify_peer_name_matches_certificate(PGconn *conn)
libpq_ngettext("server certificate for \"%s\" (and %d other name) does not match host name \"%s\"\n",
"server certificate for \"%s\" (and %d other names) does not match host name \"%s\"\n",
names_examined - 1),
- first_name, names_examined - 1, conn->pghost);
+ first_name, names_examined - 1, host);
}
else if (names_examined == 1)
{
printfPQExpBuffer(&conn->errorMessage,
libpq_gettext("server certificate for \"%s\" does not match host name \"%s\"\n"),
- first_name, conn->pghost);
+ first_name, host);
}
else
{
--
cgit v1.2.3
From e1320266edd7df53c60af10b4c33ab2754278b3e Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Tue, 22 Nov 2016 17:56:16 -0500
Subject: Doc: improve documentation about composite-value usage.
Create a section specifically for the syntactic rules around whole-row
variable usage, such as expansion of "foo.*". This was previously
documented only haphazardly, with some critical info buried in
unexpected places like xfunc-sql-composite-functions. Per repeated
questions in different mailing lists.
Discussion: <16288.1479610770@sss.pgh.pa.us>
---
doc/src/sgml/queries.sgml | 3 +-
doc/src/sgml/rowtypes.sgml | 214 +++++++++++++++++++++++++++++++++++++++++++--
doc/src/sgml/syntax.sgml | 16 ++--
doc/src/sgml/xfunc.sgml | 70 +++------------
4 files changed, 231 insertions(+), 72 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml
index 548f835830..5cc6dbce11 100644
--- a/doc/src/sgml/queries.sgml
+++ b/doc/src/sgml/queries.sgml
@@ -1457,7 +1457,8 @@ SELECT tbl1.a, tbl2.a, tbl1.b FROM ...
SELECT tbl1.*, tbl2.a FROM ...
- (See also .)
+ See for more about
+ the table_name>.*> notation.
diff --git a/doc/src/sgml/rowtypes.sgml b/doc/src/sgml/rowtypes.sgml
index 605dc71dab..9d6768e006 100644
--- a/doc/src/sgml/rowtypes.sgml
+++ b/doc/src/sgml/rowtypes.sgml
@@ -19,7 +19,7 @@
column of a table can be declared to be of a composite type.
-
+ Declaration of Composite Types
@@ -90,7 +90,7 @@ CREATE TABLE inventory_item (
- Composite Value Input
+ Constructing Composite Valuescomposite type
@@ -101,8 +101,9 @@ CREATE TABLE inventory_item (
To write a composite value as a literal constant, enclose the field
values within parentheses and separate them by commas. You can put double
quotes around any field value, and must do so if it contains commas or
- parentheses. (More details appear below.) Thus, the general format of a
- composite constant is the following:
+ parentheses. (More details appear below.) Thus, the general format of
+ a composite constant is the following:
'( val1 , val2 , ... )'
@@ -129,7 +130,8 @@ CREATE TABLE inventory_item (
the generic type constants discussed in . The constant is initially
treated as a string and passed to the composite-type input conversion
- routine. An explicit type specification might be necessary.)
+ routine. An explicit type specification might be necessary to tell
+ which type to convert the constant to.)
@@ -143,7 +145,7 @@ ROW('fuzzy dice', 42, 1.99)
ROW('', 42, NULL)
The ROW keyword is actually optional as long as you have more than one
- field in the expression, so these can simplify to:
+ field in the expression, so these can be simplified to:
('fuzzy dice', 42, 1.99)
('', 42, NULL)
@@ -153,7 +155,7 @@ ROW('', 42, NULL)
-
+ Accessing Composite Types
@@ -198,6 +200,11 @@ SELECT (my_func(...)).field FROM ...
Without the extra parentheses, this will generate a syntax error.
+
+
+ The special field name *> means all fields>, as
+ further explained in .
+
@@ -243,6 +250,199 @@ INSERT INTO mytab (complex_col.r, complex_col.i) VALUES(1.1, 2.2);
+
+ Using Composite Types in Queries
+
+
+ There are various special syntax rules and behaviors associated with
+ composite types in queries. These rules provide useful shortcuts,
+ but can be confusing if you don't know the logic behind them.
+
+
+
+ In PostgreSQL>, a reference to a table name (or alias)
+ in a query is effectively a reference to the composite value of the
+ table's current row. For example, if we had a table
+ inventory_item> as shown
+ above, we could write:
+
+SELECT c FROM inventory_item c;
+
+ This query produces a single composite-valued column, so we might get
+ output like:
+
+ c
+------------------------
+ ("fuzzy dice",42,1.99)
+(1 row)
+
+ Note however that simple names are matched to column names before table
+ names, so this example works only because there is no column
+ named c> in the query's tables.
+
+
+
+ The ordinary qualified-column-name
+ syntax table_name>.>column_name>
+ can be understood as applying field
+ selection to the composite value of the table's current row.
+ (For efficiency reasons, it's not actually implemented that way.)
+
+
+
+ When we write
+
+SELECT c.* FROM inventory_item c;
+
+ then, according to the SQL standard, we should get the contents of the
+ table expanded into separate columns:
+
+ name | supplier_id | price
+------------+-------------+-------
+ fuzzy dice | 42 | 1.99
+(1 row)
+
+ as if the query were
+
+SELECT c.name, c.supplier_id, c.price FROM inventory_item c;
+
+ PostgreSQL> will apply this expansion behavior to
+ any composite-valued expression, although as shown above, you need to write parentheses
+ around the value that .*> is applied to whenever it's not a
+ simple table name. For example, if myfunc()> is a function
+ returning a composite type with columns a>,
+ b>, and c>, then these two queries have the
+ same result:
+
+SELECT (myfunc(x)).* FROM some_table;
+SELECT (myfunc(x)).a, (myfunc(x)).b, (myfunc(x)).c FROM some_table;
+
+
+
+
+
+ PostgreSQL> handles column expansion by
+ actually transforming the first form into the second. So, in this
+ example, myfunc()> would get invoked three times per row
+ with either syntax. If it's an expensive function you may wish to
+ avoid that, which you can do with a query like:
+
+SELECT (m).* FROM (SELECT myfunc(x) AS m FROM some_table OFFSET 0) ss;
+
+ The OFFSET 0> clause keeps the optimizer
+ from flattening> the sub-select to arrive at the form with
+ multiple calls of myfunc()>.
+
+
+
+
+ The composite_value>.*> syntax results in
+ column expansion of this kind when it appears at the top level of
+ a SELECT> output
+ list, a RETURNING>
+ list in INSERT>/UPDATE>/DELETE>,
+ a VALUES> clause, or
+ a row constructor.
+ In all other contexts (including when nested inside one of those
+ constructs), attaching .*> to a composite value does not
+ change the value, since it means all columns> and so the
+ same composite value is produced again. For example,
+ if somefunc()> accepts a composite-valued argument,
+ these queries are the same:
+
+
+SELECT somefunc(c.*) FROM inventory_item c;
+SELECT somefunc(c) FROM inventory_item c;
+
+
+ In both cases, the current row of inventory_item> is
+ passed to the function as a single composite-valued argument.
+ Even though .*> does nothing in such cases, using it is good
+ style, since it makes clear that a composite value is intended. In
+ particular, the parser will consider c> in c.*> to
+ refer to a table name or alias, not to a column name, so that there is
+ no ambiguity; whereas without .*>, it is not clear
+ whether c> means a table name or a column name, and in fact
+ the column-name interpretation will be preferred if there is a column
+ named c>.
+
+
+
+ Another example demonstrating these concepts is that all these queries
+ mean the same thing:
+
+SELECT * FROM inventory_item c ORDER BY c;
+SELECT * FROM inventory_item c ORDER BY c.*;
+SELECT * FROM inventory_item c ORDER BY ROW(c.*);
+
+ All of these ORDER BY> clauses specify the row's composite
+ value, resulting in sorting the rows according to the rules described
+ in . However,
+ if inventory_item> contained a column
+ named c>, the first case would be different from the
+ others, as it would mean to sort by that column only. Given the column
+ names previously shown, these queries are also equivalent to those above:
+
+SELECT * FROM inventory_item c ORDER BY ROW(c.name, c.supplier_id, c.price);
+SELECT * FROM inventory_item c ORDER BY (c.name, c.supplier_id, c.price);
+
+ (The last case uses a row constructor with the key word ROW>
+ omitted.)
+
+
+
+ Another special syntactical behavior associated with composite values is
+ that we can use functional notation> for extracting a field
+ of a composite value. The simple way to explain this is that
+ the notations field>(table>)>
+ and table>.field>>
+ are interchangeable. For example, these queries are equivalent:
+
+
+SELECT c.name FROM inventory_item c WHERE c.price > 1000;
+SELECT name(c) FROM inventory_item c WHERE price(c) > 1000;
+
+
+ Moreover, if we have a function that accepts a single argument of a
+ composite type, we can call it with either notation. These queries are
+ all equivalent:
+
+
+SELECT somefunc(c) FROM inventory_item c;
+SELECT somefunc(c.*) FROM inventory_item c;
+SELECT c.somefunc FROM inventory_item c;
+
+
+
+
+ This equivalence between functional notation and field notation
+ makes it possible to use functions on composite types to implement
+ computed fields>.
+
+ computed field
+
+
+ field
+ computed
+
+ An application using the last query above wouldn't need to be directly
+ aware that somefunc> isn't a real column of the table.
+
+
+
+
+ Because of this behavior, it's unwise to give a function that takes a
+ single composite-type argument the same name as any of the fields of
+ that composite type. If there is ambiguity, the field-name
+ interpretation will be preferred, so that such a function could not be
+ called without tricks. One way to force the function interpretation is
+ to schema-qualify the function name, that is, write
+ schema>.func>(compositevalue>).
+
+
+
+
Composite Type Input and Output Syntax
diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml
index 36df6c6b1b..000da39250 100644
--- a/doc/src/sgml/syntax.sgml
+++ b/doc/src/sgml/syntax.sgml
@@ -1449,12 +1449,13 @@ $1.somecolumn
- In a select list (see ), you
- can ask for all fields of a composite value by
+ You can ask for all fields of a composite value by
writing .*:
(compositecol).*
+ This notation behaves differently depending on context;
+ see for details.
@@ -1531,7 +1532,7 @@ sqrt(2)
interchangeable. This behavior is not SQL-standard but is provided
in PostgreSQL> because it allows use of functions to
emulate computed fields>. For more information see
- .
+ .
@@ -2291,7 +2292,8 @@ SELECT ROW(1,2.5,'this is a test');
rowvalue.*,
which will be expanded to a list of the elements of the row value,
just as occurs when the .*> syntax is used at the top level
- of a SELECT> list. For example, if table t> has
+ of a SELECT> list (see ).
+ For example, if table t> has
columns f1> and f2>, these are the same:
SELECT ROW(t.*, 42) FROM t;
@@ -2302,9 +2304,9 @@ SELECT ROW(t.f1, t.f2, 42) FROM t;
Before PostgreSQL 8.2, the
- .* syntax was not expanded, so that writing
- ROW(t.*, 42)> created a two-field row whose first field
- was another row value. The new behavior is usually more useful.
+ .* syntax was not expanded in row constructors, so
+ that writing ROW(t.*, 42)> created a two-field row whose first
+ field was another row value. The new behavior is usually more useful.
If you need the old behavior of nested row values, write the inner
row value without .*, for instance
ROW(t, 42)>.
diff --git a/doc/src/sgml/xfunc.sgml b/doc/src/sgml/xfunc.sgml
index 783702a637..f2f379870f 100644
--- a/doc/src/sgml/xfunc.sgml
+++ b/doc/src/sgml/xfunc.sgml
@@ -394,8 +394,8 @@ SELECT name, double_salary(emp.*) AS dream
Notice the use of the syntax $1.salary
to select one field of the argument row value. Also notice
- how the calling SELECT> command uses *>
- to select
+ how the calling SELECT> command
+ uses table_name>.*> to select
the entire current row of a table as a composite value. The table
row can alternatively be referenced using just the table name,
like this:
@@ -405,6 +405,8 @@ SELECT name, double_salary(emp) AS dream
WHERE emp.cubicle ~= point '(2,1)';
but this usage is deprecated since it's easy to get confused.
+ (See for details about these
+ two notations for the composite value of a table row.)
@@ -479,7 +481,8 @@ $$ LANGUAGE SQL;
- We could call this function directly in either of two ways:
+ We could call this function directly either by using it in
+ a value expression:
SELECT new_emp();
@@ -487,7 +490,11 @@ SELECT new_emp();
new_emp
--------------------------
(None,1000.0,25,"(2,2)")
+
+
+ or by calling it as a table function:
+
SELECT * FROM new_emp();
name | salary | age | cubicle
@@ -524,12 +531,7 @@ LINE 1: SELECT new_emp().name;
- Another option is to use
- functional notation for extracting an attribute. The simple way
- to explain this is that we can use the
- notations attribute>(table>)>
- and table>.attribute>>
- interchangeably.
+ Another option is to use functional notation for extracting an attribute:
SELECT name(new_emp());
@@ -539,50 +541,10 @@ SELECT name(new_emp());
None
-
--- This is the same as:
--- SELECT emp.name AS youngster FROM emp WHERE emp.age < 30;
-
-SELECT name(emp) AS youngster FROM emp WHERE age(emp) < 30;
-
- youngster
------------
- Sam
- Andy
-
+ As explained in , the field notation and
+ functional notation are equivalent.
-
-
- The equivalence between functional notation and attribute notation
- makes it possible to use functions on composite types to emulate
- computed fields>.
-
- computed field
-
-
- field
- computed
-
- For example, using the previous definition
- for double_salary(emp)>, we can write
-
-
-SELECT emp.name, emp.double_salary FROM emp;
-
-
- An application using this wouldn't need to be directly aware that
- double_salary> isn't a real column of the table.
- (You can also emulate computed fields with views.)
-
-
-
- Because of this behavior, it's unwise to give a function that takes
- a single composite-type argument the same name as any of the fields of
- that composite type.
-
-
-
Another way to use a function returning a composite type is to pass the
result to another function that accepts the correct row type as input:
@@ -599,12 +561,6 @@ SELECT getname(new_emp());
(1 row)
-
-
- Still another way to use a function that returns a composite type is to
- call it as a table function, as described in .
-
--
cgit v1.2.3
From dbdfd114f34443f1e4ad16ce2721f9817d3b3d80 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Fri, 25 Nov 2016 18:36:10 -0500
Subject: Bring some clarity to the defaults for the xxx_flush_after
parameters.
Instead of confusingly stating platform-dependent defaults for these
parameters in the comments in postgresql.conf.sample (with the main
entry being a lie on Linux), teach initdb to install the correct
platform-dependent value in postgresql.conf, similarly to the way
we handle other platform-dependent defaults. This won't do anything
for existing 9.6 installations, but since it's effectively only a
documentation improvement, that seems OK.
Since this requires initdb to have access to the default values,
move the #define's for those to pg_config_manual.h; the original
placement in bufmgr.h is unworkable because that file can't be
included by frontend programs.
Adjust the default value for wal_writer_flush_after so that it is 1MB
regardless of XLOG_BLCKSZ, conforming to what is stated in both the
SGML docs and postgresql.conf. (We could alternatively make it scale
with XLOG_BLCKSZ, but I'm not sure I see the point.)
Copy-edit related SGML documentation.
Fabien Coelho and Tom Lane, per a gripe from Tomas Vondra.
Discussion: <30ebc6e3-8358-09cf-44a8-578252938424@2ndquadrant.com>
---
doc/src/sgml/config.sgml | 33 ++++++++++++++-------------
src/backend/access/transam/xlog.c | 2 +-
src/backend/utils/misc/guc.c | 8 +++----
src/backend/utils/misc/postgresql.conf.sample | 10 ++++----
src/bin/initdb/initdb.c | 25 ++++++++++++++++++--
src/include/pg_config_manual.h | 18 +++++++++++++++
src/include/storage/bufmgr.h | 11 ---------
7 files changed, 66 insertions(+), 41 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index adab2f8378..dcd06634fe 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1903,10 +1903,10 @@ include_dir 'conf.d'
, but smaller than the OS's page
cache, where performance might degrade. This setting may have no
effect on some platforms. The valid range is between
- 0, which disables controlled writeback, and
+ 0, which disables forced writeback, and
2MB. The default is 512kB> on Linux,
- 0> elsewhere. (Non-default values of
- BLCKSZ change the default and maximum.)
+ 0> elsewhere. (If BLCKSZ is not 8kB,
+ the default and maximum values scale proportionally to it.)
This parameter can only be set in the postgresql.conf>
file or on the server command line.
@@ -2055,10 +2055,10 @@ include_dir 'conf.d'
that are bigger than , but smaller
than the OS's page cache, where performance might degrade. This
setting may have no effect on some platforms. The valid range is
- between 0, which disables controlled writeback,
- and 2MB. The default is 0> (i.e. no
- flush control). (Non-default values of BLCKSZ
- change the maximum.)
+ between 0, which disables forced writeback,
+ and 2MB. The default is 0>, i.e., no
+ forced writeback. (If BLCKSZ is not 8kB,
+ the maximum value scales proportionally to it.)
@@ -2518,10 +2518,11 @@ include_dir 'conf.d'
Specifies how often the WAL writer flushes WAL. After flushing WAL it
sleeps for wal_writer_delay> milliseconds, unless woken up
- by an asynchronously committing transaction. In case the last flush
+ by an asynchronously committing transaction. If the last flush
happened less than wal_writer_delay> milliseconds ago and
less than wal_writer_flush_after> bytes of WAL have been
- produced since, WAL is only written to the OS, not flushed to disk.
+ produced since, then WAL is only written to the operating system, not
+ flushed to disk.
The default value is 200 milliseconds (200ms>). Note that
on many systems, the effective resolution of sleep delays is 10
milliseconds; setting wal_writer_delay> to a value that is
@@ -2540,12 +2541,12 @@ include_dir 'conf.d'
- Specifies how often the WAL writer flushes WAL. In case the last flush
+ Specifies how often the WAL writer flushes WAL. If the last flush
happened less than wal_writer_delay> milliseconds ago and
less than wal_writer_flush_after> bytes of WAL have been
- produced since, WAL is only written to the OS, not flushed to disk.
- If wal_writer_flush_after> is set to 0> WAL is
- flushed every time the WAL writer has written WAL. The default is
+ produced since, then WAL is only written to the operating system, not
+ flushed to disk. If wal_writer_flush_after> is set
+ to 0> then WAL data is flushed immediately. The default is
1MB. This parameter can only be set in the
postgresql.conf> file or on the server command line.
@@ -2665,10 +2666,10 @@ include_dir 'conf.d'
that are bigger than , but smaller
than the OS's page cache, where performance might degrade. This
setting may have no effect on some platforms. The valid range is
- between 0, which disables controlled writeback,
+ between 0, which disables forced writeback,
and 2MB. The default is 256kB> on
- Linux, 0> elsewhere. (Non-default values of
- BLCKSZ change the default and maximum.)
+ Linux, 0> elsewhere. (If BLCKSZ is not
+ 8kB, the default and maximum values scale proportionally to it.)
This parameter can only be set in the postgresql.conf>
file or on the server command line.
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index ce4f1fc298..084401d2f2 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2759,7 +2759,7 @@ XLogFlush(XLogRecPtr record)
* This routine is invoked periodically by the background walwriter process.
*
* Returns TRUE if there was any work to do, even if we skipped flushing due
- * to wal_writer_delay/wal_flush_after.
+ * to wal_writer_delay/wal_writer_flush_after.
*/
bool
XLogBackgroundFlush(void)
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index da74f00ab2..28ebcb6f3f 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2281,7 +2281,6 @@ static struct config_int ConfigureNamesInt[] =
GUC_UNIT_BLOCKS
},
&checkpoint_flush_after,
- /* see bufmgr.h: OS dependent default */
DEFAULT_CHECKPOINT_FLUSH_AFTER, 0, WRITEBACK_MAX_PENDING_FLUSHES,
NULL, NULL, NULL
},
@@ -2310,12 +2309,12 @@ static struct config_int ConfigureNamesInt[] =
{
{"wal_writer_flush_after", PGC_SIGHUP, WAL_SETTINGS,
- gettext_noop("Amount of WAL written out by WAL writer triggering a flush."),
+ gettext_noop("Amount of WAL written out by WAL writer that triggers a flush."),
NULL,
GUC_UNIT_XBLOCKS
},
&WalWriterFlushAfter,
- 128, 0, INT_MAX,
+ (1024*1024) / XLOG_BLCKSZ, 0, INT_MAX,
NULL, NULL, NULL
},
@@ -2439,7 +2438,6 @@ static struct config_int ConfigureNamesInt[] =
GUC_UNIT_BLOCKS
},
&bgwriter_flush_after,
- /* see bufmgr.h: OS dependent default */
DEFAULT_BGWRITER_FLUSH_AFTER, 0, WRITEBACK_MAX_PENDING_FLUSHES,
NULL, NULL, NULL
},
@@ -2467,7 +2465,7 @@ static struct config_int ConfigureNamesInt[] =
GUC_UNIT_BLOCKS
},
&backend_flush_after,
- 0, 0, WRITEBACK_MAX_PENDING_FLUSHES,
+ DEFAULT_BACKEND_FLUSH_AFTER, 0, WRITEBACK_MAX_PENDING_FLUSHES,
NULL, NULL, NULL
},
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 7c2daa54bc..0df15380a9 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -156,8 +156,7 @@
#bgwriter_delay = 200ms # 10-10000ms between rounds
#bgwriter_lru_maxpages = 100 # 0-1000 max buffers written/round
#bgwriter_lru_multiplier = 2.0 # 0-10.0 multiplier on buffers scanned/round
-#bgwriter_flush_after = 0 # 0 disables,
- # default is 512kB on linux, 0 otherwise
+#bgwriter_flush_after = 0 # measured in pages, 0 disables
# - Asynchronous Behavior -
@@ -166,7 +165,7 @@
#max_parallel_workers_per_gather = 2 # taken from max_worker_processes
#old_snapshot_threshold = -1 # 1min-60d; -1 disables; 0 is immediate
# (change requires restart)
-#backend_flush_after = 0 # 0 disables, default is 0
+#backend_flush_after = 0 # measured in pages, 0 disables
#------------------------------------------------------------------------------
@@ -196,7 +195,7 @@
#wal_buffers = -1 # min 32kB, -1 sets based on shared_buffers
# (change requires restart)
#wal_writer_delay = 200ms # 1-10000 milliseconds
-#wal_writer_flush_after = 1MB # 0 disables
+#wal_writer_flush_after = 1MB # measured in pages, 0 disables
#commit_delay = 0 # range 0-100000, in microseconds
#commit_siblings = 5 # range 1-1000
@@ -207,8 +206,7 @@
#max_wal_size = 1GB
#min_wal_size = 80MB
#checkpoint_completion_target = 0.5 # checkpoint target duration, 0.0 - 1.0
-#checkpoint_flush_after = 0 # 0 disables,
- # default is 256kB on linux, 0 otherwise
+#checkpoint_flush_after = 0 # measured in pages, 0 disables
#checkpoint_warning = 30s # 0 disables
# - Archiving -
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index c8a8c52c3d..24f9cc8eae 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -64,11 +64,11 @@
#include "common/file_utils.h"
#include "common/restricted_token.h"
#include "common/username.h"
-#include "mb/pg_wchar.h"
+#include "fe_utils/string_utils.h"
#include "getaddrinfo.h"
#include "getopt_long.h"
+#include "mb/pg_wchar.h"
#include "miscadmin.h"
-#include "fe_utils/string_utils.h"
/* Ideally this would be in a .h file, but it hardly seems worth the trouble */
@@ -1095,6 +1095,27 @@ setup_config(void)
conflines = replace_token(conflines, "#dynamic_shared_memory_type = posix",
repltok);
+#if DEFAULT_BACKEND_FLUSH_AFTER > 0
+ snprintf(repltok, sizeof(repltok), "#backend_flush_after = %dkB",
+ DEFAULT_BACKEND_FLUSH_AFTER * (BLCKSZ / 1024));
+ conflines = replace_token(conflines, "#backend_flush_after = 0",
+ repltok);
+#endif
+
+#if DEFAULT_BGWRITER_FLUSH_AFTER > 0
+ snprintf(repltok, sizeof(repltok), "#bgwriter_flush_after = %dkB",
+ DEFAULT_BGWRITER_FLUSH_AFTER * (BLCKSZ / 1024));
+ conflines = replace_token(conflines, "#bgwriter_flush_after = 0",
+ repltok);
+#endif
+
+#if DEFAULT_CHECKPOINT_FLUSH_AFTER > 0
+ snprintf(repltok, sizeof(repltok), "#checkpoint_flush_after = %dkB",
+ DEFAULT_CHECKPOINT_FLUSH_AFTER * (BLCKSZ / 1024));
+ conflines = replace_token(conflines, "#checkpoint_flush_after = 0",
+ repltok);
+#endif
+
#ifndef USE_PREFETCH
conflines = replace_token(conflines,
"#effective_io_concurrency = 1",
diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
index a2b2b614be..96885bb990 100644
--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -147,6 +147,24 @@
#define USE_PREFETCH
#endif
+/*
+ * Default and maximum values for backend_flush_after, bgwriter_flush_after
+ * and checkpoint_flush_after; measured in blocks. Currently, these are
+ * enabled by default if sync_file_range() exists, ie, only on Linux. Perhaps
+ * we could also enable by default if we have mmap and msync(MS_ASYNC)?
+ */
+#ifdef HAVE_SYNC_FILE_RANGE
+#define DEFAULT_BACKEND_FLUSH_AFTER 0 /* never enabled by default */
+#define DEFAULT_BGWRITER_FLUSH_AFTER 64
+#define DEFAULT_CHECKPOINT_FLUSH_AFTER 32
+#else
+#define DEFAULT_BACKEND_FLUSH_AFTER 0
+#define DEFAULT_BGWRITER_FLUSH_AFTER 0
+#define DEFAULT_CHECKPOINT_FLUSH_AFTER 0
+#endif
+/* upper limit for all three variables */
+#define WRITEBACK_MAX_PENDING_FLUSHES 256
+
/*
* USE_SSL code should be compiled only when compiling with an SSL
* implementation. (Currently, only OpenSSL is supported, but we might add
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 821bee5ece..c543ad6fde 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -54,17 +54,6 @@ struct WritebackContext;
extern PGDLLIMPORT int NBuffers;
/* in bufmgr.c */
-#define WRITEBACK_MAX_PENDING_FLUSHES 256
-
-/* FIXME: Also default to on for mmap && msync(MS_ASYNC)? */
-#ifdef HAVE_SYNC_FILE_RANGE
-#define DEFAULT_CHECKPOINT_FLUSH_AFTER 32
-#define DEFAULT_BGWRITER_FLUSH_AFTER 64
-#else
-#define DEFAULT_CHECKPOINT_FLUSH_AFTER 0
-#define DEFAULT_BGWRITER_FLUSH_AFTER 0
-#endif /* HAVE_SYNC_FILE_RANGE */
-
extern bool zero_damaged_pages;
extern int bgwriter_lru_maxpages;
extern double bgwriter_lru_multiplier;
--
cgit v1.2.3
From c6dbc7b65175f791b8e63a1970ee6b6f68ebec09 Mon Sep 17 00:00:00 2001
From: Magnus Hagander
Date: Sun, 27 Nov 2016 17:10:02 +0100
Subject: Mention server start requirement for ssl parameters
Fix that the documentation for three ssl related parameters did not
specify that they can only be changed at server start.
Michael Paquier
---
doc/src/sgml/config.sgml | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index dcd06634fe..d8d207e5eb 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1048,7 +1048,8 @@ include_dir 'conf.d'
in the OpenSSL> package for the syntax of this setting
and a list of supported values. The default value is
HIGH:MEDIUM:+3DES:!aNULL>. It is usually reasonable,
- unless you have specific security requirements.
+ unless you have specific security requirements. This parameter can only
+ be set at server start.
@@ -1120,7 +1121,8 @@ include_dir 'conf.d'
Specifies whether to use the server's SSL cipher preferences, rather
- than the client's. The default is true.
+ than the client's. The default is true. This parameter can only be
+ set at server start.
@@ -1144,7 +1146,8 @@ include_dir 'conf.d'
Specifies the name of the curve to use in ECDH> key
exchange. It needs to be supported by all clients that connect.
It does not need to be same curve as used by server's Elliptic
- Curve key. The default is prime256v1>.
+ Curve key. The default is prime256v1>. This parameter
+ can only be set at server start.
--
cgit v1.2.3
From 489a51af12de58e336c7b97e793aed6d11bb13b5 Mon Sep 17 00:00:00 2001
From: Stephen Frost
Date: Tue, 29 Nov 2016 09:03:11 -0500
Subject: Correct psql documentation example
An example in the psql documentation had an incorrect field name from
what the command actually produced.
Pointed out by Fabien COELHO
Back-patch to 9.6 where the example was added.
Discussion: https://postgr.es/m/alpine.DEB.2.20.1611291349400.19314@lancre
---
doc/src/sgml/ref/psql-ref.sgml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml
index 2410bee4eb..261652a45f 100644
--- a/doc/src/sgml/ref/psql-ref.sgml
+++ b/doc/src/sgml/ref/psql-ref.sgml
@@ -4172,7 +4172,7 @@ second | four
with the \crosstabview command:
testdb=> SELECT first, second, first > 2 AS gt2 FROM my_table;
- first | second | ge2
+ first | second | gt2
-------+--------+-----
1 | one | f
2 | two | f
--
cgit v1.2.3
From b57c8333b55e315927ca65016f1dbc165ef30df9 Mon Sep 17 00:00:00 2001
From: Stephen Frost
Date: Tue, 29 Nov 2016 10:35:04 -0500
Subject: Clarify pg_dump -b documentation
The documentation around the -b/--blobs option to pg_dump seemed to
imply that it might be possible to add blobs to a "schema-only" dump or
similar. Clarify that blobs are data and therefore will only be
included in dumps where data is being included, even when -b is used to
request blobs be included.
The -b option has been around since before 9.2, so back-patch to all
supported branches.
Discussion: https://postgr.es/m/20161119173316.GA13284@tamriel.snowman.net
---
doc/src/sgml/ref/pg_dump.sgml | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index 371a61427d..04b312445d 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -138,8 +138,11 @@ PostgreSQL documentation
Include large objects in the dump. This is the default behavior
except when
--schema>,
--table>, or
-
--schema-only> is specified, so the
-b>
- switch is only useful to add large objects to selective dumps.
+
--schema-only> is specified. The
-b>
+ switch is therefore only useful to add large objects to dumps
+ where a specific schema or table has been requested. Note that
+ blobs are considered data and therefore will be included when
+ --data-only is used, but not when --schema-only is.
--
cgit v1.2.3
From 4fafa579b0ab411f4cd2f30c57205d9b0ac27340 Mon Sep 17 00:00:00 2001
From: Stephen Frost
Date: Tue, 29 Nov 2016 11:09:35 -0500
Subject: Add --no-blobs option to pg_dump
Add an option to exclude blobs when running pg_dump. By default, blobs
are included but this option can be used to exclude them while keeping
the rest of the dump.
Commment updates and regression tests from me.
Author: Guillaume Lelarge
Reviewed-by: Amul Sul
Discussion: https://postgr.es/m/VisenaEmail.48.49926ea6f91dceb6.15355a48249@tc7-visena
---
doc/src/sgml/ref/pg_dump.sgml | 16 ++++++++
src/bin/pg_dump/pg_backup.h | 1 +
src/bin/pg_dump/pg_dump.c | 19 +++++++--
src/bin/pg_dump/t/002_pg_dump.pl | 86 +++++++++++++++++++++++++++++++++++++++-
4 files changed, 117 insertions(+), 5 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index 04b312445d..f6225d23c8 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -147,6 +147,22 @@ PostgreSQL documentation
+
+
-B>
+
--no-blobs>
+
+
+ Exclude large objects in the dump.
+
+
+
+ When both
-b> and
-B> are given, the behavior
+ is to output large objects, when data is being dumped, see the
+
-b> documentation.
+
+
+
+
-c
--clean
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index cfdfae5e12..7241cdfc44 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -159,6 +159,7 @@ typedef struct _dumpOptions
int outputClean;
int outputCreateDB;
bool outputBlobs;
+ bool dontOutputBlobs;
int outputNoOwner;
char *outputSuperuser;
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 9f59f53a7d..2ff60b9879 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -291,6 +291,7 @@ main(int argc, char **argv)
static struct option long_options[] = {
{"data-only", no_argument, NULL, 'a'},
{"blobs", no_argument, NULL, 'b'},
+ {"no-blobs", no_argument, NULL, 'B'},
{"clean", no_argument, NULL, 'c'},
{"create", no_argument, NULL, 'C'},
{"dbname", required_argument, NULL, 'd'},
@@ -379,7 +380,7 @@ main(int argc, char **argv)
InitDumpOptions(&dopt);
- while ((c = getopt_long(argc, argv, "abcCd:E:f:F:h:j:n:N:oOp:RsS:t:T:U:vwWxZ:",
+ while ((c = getopt_long(argc, argv, "abBcCd:E:f:F:h:j:n:N:oOp:RsS:t:T:U:vwWxZ:",
long_options, &optindex)) != -1)
{
switch (c)
@@ -392,6 +393,10 @@ main(int argc, char **argv)
dopt.outputBlobs = true;
break;
+ case 'B': /* Don't dump blobs */
+ dopt.dontOutputBlobs = true;
+ break;
+
case 'c': /* clean (i.e., drop) schema prior to create */
dopt.outputClean = 1;
break;
@@ -713,10 +718,15 @@ main(int argc, char **argv)
/* non-matching exclusion patterns aren't an error */
/*
- * Dumping blobs is now default unless we saw an inclusion switch or -s
- * ... but even if we did see one of these, -b turns it back on.
+ * Dumping blobs is the default for dumps where an inclusion switch is not
+ * used (an "include everything" dump). -B can be used to exclude blobs
+ * from those dumps. -b can be used to include blobs even when an
+ * inclusion switch is used.
+ *
+ * -s means "schema only" and blobs are data, not schema, so we never
+ * include blobs when -s is used.
*/
- if (dopt.include_everything && !dopt.schemaOnly)
+ if (dopt.include_everything && !dopt.schemaOnly && !dopt.dontOutputBlobs)
dopt.outputBlobs = true;
/*
@@ -876,6 +886,7 @@ help(const char *progname)
printf(_("\nOptions controlling the output content:\n"));
printf(_(" -a, --data-only dump only the data, not the schema\n"));
printf(_(" -b, --blobs include large objects in dump\n"));
+ printf(_(" -B, --no-blobs exclude large objects in dump\n"));
printf(_(" -c, --clean clean (drop) database objects before recreating\n"));
printf(_(" -C, --create include commands to create database in dump\n"));
printf(_(" -E, --encoding=ENCODING dump the data in encoding ENCODING\n"));
diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl
index 35ee85325b..7379487471 100644
--- a/src/bin/pg_dump/t/002_pg_dump.pl
+++ b/src/bin/pg_dump/t/002_pg_dump.pl
@@ -144,6 +144,9 @@ my %pgdump_runs = (
pg_dumpall_dbprivs => {
dump_cmd =>
[ 'pg_dumpall', '-f', "$tempdir/pg_dumpall_dbprivs.sql", ], },
+ no_blobs => {
+ dump_cmd =>
+ [ 'pg_dump', '-f', "$tempdir/no_blobs.sql", '-B', 'postgres', ], },
no_privs => {
dump_cmd =>
[ 'pg_dump', '-f', "$tempdir/no_privs.sql", '-x', 'postgres', ], },
@@ -184,7 +187,7 @@ my %pgdump_runs = (
test_schema_plus_blobs => {
dump_cmd => [
'pg_dump', '-f', "$tempdir/test_schema_plus_blobs.sql",
- '-n', 'dump_test', '-b', 'postgres', ], },);
+ '-n', 'dump_test', '-b', '-B', 'postgres', ], },);
###############################################################
# Definition of the tests to run.
@@ -243,6 +246,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_owner => 1,
only_dump_test_schema => 1,
pg_dumpall_dbprivs => 1,
@@ -283,6 +287,7 @@ my %tests = (
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -309,6 +314,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
only_dump_test_schema => 1,
pg_dumpall_dbprivs => 1,
@@ -339,6 +345,7 @@ my %tests = (
test_schema_plus_blobs => 1, },
unlike => {
binary_upgrade => 1,
+ no_blobs => 1,
no_owner => 1,
only_dump_test_schema => 1,
only_dump_test_table => 1,
@@ -362,6 +369,7 @@ my %tests = (
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
pg_dumpall_dbprivs => 1,
schema_only => 1,
@@ -384,6 +392,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
only_dump_test_schema => 1,
pg_dumpall_dbprivs => 1,
@@ -408,6 +417,7 @@ my %tests = (
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
pg_dumpall_dbprivs => 1,
role => 1,
@@ -430,6 +440,7 @@ my %tests = (
createdb => 1,
defaults => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -463,6 +474,7 @@ my %tests = (
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
pg_dumpall_dbprivs => 1,
@@ -494,6 +506,7 @@ my %tests = (
createdb => 1,
defaults => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -520,6 +533,7 @@ my %tests = (
createdb => 1,
defaults => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
only_dump_test_schema => 1,
only_dump_test_table => 1,
@@ -546,6 +560,7 @@ my %tests = (
createdb => 1,
defaults => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -572,6 +587,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
only_dump_test_schema => 1,
pg_dumpall_dbprivs => 1,
@@ -596,6 +612,7 @@ my %tests = (
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
pg_dumpall_dbprivs => 1,
role => 1,
@@ -655,6 +672,7 @@ my %tests = (
},
unlike => {
binary_upgrade => 1,
+ no_blobs => 1,
only_dump_test_schema => 1,
only_dump_test_table => 1,
pg_dumpall_globals => 1,
@@ -691,6 +709,7 @@ my %tests = (
},
unlike => {
binary_upgrade => 1,
+ no_blobs => 1,
only_dump_test_schema => 1,
only_dump_test_table => 1,
pg_dumpall_globals => 1,
@@ -715,6 +734,7 @@ my %tests = (
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
pg_dumpall_dbprivs => 1,
@@ -737,6 +757,7 @@ my %tests = (
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
pg_dumpall_dbprivs => 1,
@@ -762,6 +783,7 @@ my %tests = (
createdb => 1,
defaults => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -790,6 +812,7 @@ my %tests = (
createdb => 1,
defaults => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -819,6 +842,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -860,6 +884,7 @@ my %tests = (
createdb => 1,
data_only => 1,
defaults => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -891,6 +916,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -936,6 +962,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -967,6 +994,7 @@ my %tests = (
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
pg_dumpall_dbprivs => 1,
@@ -1032,6 +1060,7 @@ my %tests = (
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
pg_dumpall_dbprivs => 1,
@@ -1067,6 +1096,7 @@ my %tests = (
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -1094,6 +1124,7 @@ my %tests = (
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -1127,6 +1158,7 @@ my %tests = (
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -1153,6 +1185,7 @@ my %tests = (
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
pg_dumpall_dbprivs => 1,
@@ -1196,6 +1229,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -1236,6 +1270,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -1275,6 +1310,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -1313,6 +1349,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -1351,6 +1388,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -1389,6 +1427,7 @@ my %tests = (
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
pg_dumpall_dbprivs => 1,
@@ -1425,6 +1464,7 @@ my %tests = (
createdb => 1,
defaults => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -1462,6 +1502,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -1502,6 +1543,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -1536,6 +1578,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -1567,6 +1610,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -1604,6 +1648,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -1641,6 +1686,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -1687,6 +1733,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -1726,6 +1773,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -1758,6 +1806,7 @@ my %tests = (
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
pg_dumpall_dbprivs => 1,
@@ -1789,6 +1838,7 @@ my %tests = (
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
pg_dumpall_dbprivs => 1,
@@ -1823,6 +1873,7 @@ my %tests = (
# exclude_dump_test_schema => 1,
# exclude_test_table => 1,
# exclude_test_table_data => 1,
+# no_blobs => 1,
# no_privs => 1,
# no_owner => 1,
# pg_dumpall_dbprivs => 1,
@@ -1856,6 +1907,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
pg_dumpall_dbprivs => 1,
@@ -1891,6 +1943,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -1927,6 +1980,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -1960,6 +2014,7 @@ my %tests = (
createdb => 1,
defaults => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -1993,6 +2048,7 @@ my %tests = (
createdb => 1,
defaults => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -2026,6 +2082,7 @@ my %tests = (
createdb => 1,
defaults => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -2059,6 +2116,7 @@ my %tests = (
createdb => 1,
defaults => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -2092,6 +2150,7 @@ my %tests = (
createdb => 1,
defaults => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -2122,6 +2181,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -2152,6 +2212,7 @@ my %tests = (
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
pg_dumpall_dbprivs => 1,
@@ -2187,6 +2248,7 @@ my %tests = (
createdb => 1,
defaults => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -2223,6 +2285,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -2260,6 +2323,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -2296,6 +2360,7 @@ my %tests = (
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
pg_dumpall_dbprivs => 1,
@@ -2329,6 +2394,7 @@ my %tests = (
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -2364,6 +2430,7 @@ my %tests = (
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
pg_dumpall_dbprivs => 1,
@@ -2397,6 +2464,7 @@ my %tests = (
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
pg_dumpall_dbprivs => 1,
@@ -2577,6 +2645,7 @@ my %tests = (
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -2608,6 +2677,7 @@ my %tests = (
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_owner => 1,
pg_dumpall_dbprivs => 1,
role => 1,
@@ -2638,6 +2708,7 @@ my %tests = (
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_owner => 1,
only_dump_test_schema => 1,
only_dump_test_table => 1,
@@ -2662,6 +2733,7 @@ my %tests = (
createdb => 1,
defaults => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_owner => 1,
only_dump_test_schema => 1,
only_dump_test_table => 1,
@@ -2693,6 +2765,7 @@ qr/^GRANT SELECT ON TABLE test_third_table TO regress_dump_test_role;/m,
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_owner => 1,
pg_dumpall_dbprivs => 1,
role => 1,
@@ -2723,6 +2796,7 @@ qr/^GRANT SELECT ON TABLE test_third_table TO regress_dump_test_role;/m,
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_owner => 1,
pg_dumpall_dbprivs => 1,
role => 1,
@@ -2752,6 +2826,7 @@ qr/^GRANT SELECT ON TABLE test_third_table TO regress_dump_test_role;/m,
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_owner => 1,
only_dump_test_schema => 1,
pg_dumpall_dbprivs => 1,
@@ -2782,6 +2857,7 @@ qr/^GRANT SELECT ON TABLE test_third_table TO regress_dump_test_role;/m,
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_owner => 1,
pg_dumpall_dbprivs => 1,
schema_only => 1,
@@ -2871,6 +2947,7 @@ qr/^GRANT SELECT ON TABLE test_third_table TO regress_dump_test_role;/m,
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_owner => 1,
pg_dumpall_dbprivs => 1,
schema_only => 1,
@@ -2904,6 +2981,7 @@ qr/^GRANT SELECT ON TABLE test_third_table TO regress_dump_test_role;/m,
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -2937,6 +3015,7 @@ qr/^GRANT SELECT ON TABLE test_third_table TO regress_dump_test_role;/m,
defaults => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_privs => 1,
no_owner => 1,
only_dump_test_schema => 1,
@@ -2976,6 +3055,7 @@ qr/^GRANT SELECT ON TABLE test_third_table TO regress_dump_test_role;/m,
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_owner => 1,
only_dump_test_schema => 1,
only_dump_test_table => 1,
@@ -3006,6 +3086,7 @@ qr/^GRANT SELECT ON TABLE test_third_table TO regress_dump_test_role;/m,
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_owner => 1,
pg_dumpall_dbprivs => 1,
schema_only => 1,
@@ -3032,6 +3113,7 @@ qr/^GRANT SELECT ON TABLE test_third_table TO regress_dump_test_role;/m,
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_owner => 1,
pg_dumpall_dbprivs => 1,
schema_only => 1,
@@ -3061,6 +3143,7 @@ qr/^GRANT SELECT ON TABLE test_third_table TO regress_dump_test_role;/m,
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_owner => 1,
pg_dumpall_dbprivs => 1,
schema_only => 1,
@@ -3087,6 +3170,7 @@ qr/^GRANT SELECT ON TABLE test_third_table TO regress_dump_test_role;/m,
exclude_dump_test_schema => 1,
exclude_test_table => 1,
exclude_test_table_data => 1,
+ no_blobs => 1,
no_owner => 1,
pg_dumpall_dbprivs => 1,
schema_only => 1,
--
cgit v1.2.3
From 721f7bd3cbccaf8c07cad2707826b83f84694832 Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Tue, 29 Nov 2016 12:18:31 -0500
Subject: libpq: Add target_session_attrs parameter.
Commit 274bb2b3857cc987cfa21d14775cae9b0dababa5 made it possible to
specify multiple IPs in a connection string, but that's not good
enough for the case where you have a read-write master and a bunch of
read-only standbys and want to connect to whichever server is the
master at the current time. This commit allows that, by making it
possible to specify target_session_attrs=read-write as a connection
parameter.
There was extensive discussion of the best name for the connection
parameter and its values as well as the best way to distinguish master
and standbys. For now, adopt the same solution as JDBC: if the user
wants a read-write connection, issue 'show transaction_read_only' and
rejection the connection if the result is 'on'. In the future, we
could add additional values of this new target_session_attrs parameter
that issue different queries; or we might have some way of
distinguishing the server type without resorting to an SQL query; but
right now, we have this, and that's (hopefully) a good start.
Victor Wagner and Mithun Cy. Design review by Álvaro Herrera, Catalin
Iacob, Takayuki Tsunakawa, and Craig Ringer; code review by me. I
changed Mithun's patch to skip all remaining IPs for a host if we
reject a connection based on this new parameter, rewrote the
documentation, and did some other cosmetic cleanup.
Discussion: http://postgr.es/m/CAD__OuhqPRGpcsfwPHz_PDqAGkoqS1UvnUnOnAB-LBWBW=wu4A@mail.gmail.com
---
doc/src/sgml/libpq.sgml | 29 ++++-
src/interfaces/libpq/fe-connect.c | 242 ++++++++++++++++++++++++++++++--------
src/interfaces/libpq/libpq-fe.h | 4 +-
src/interfaces/libpq/libpq-int.h | 3 +
4 files changed, 227 insertions(+), 51 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml
index 0f375bf5f2..2620eec033 100644
--- a/doc/src/sgml/libpq.sgml
+++ b/doc/src/sgml/libpq.sgml
@@ -811,7 +811,7 @@ postgresql://localhost/mydb
postgresql://user@localhost
postgresql://user:secret@localhost
postgresql://other@localhost/otherdb?connect_timeout=10&application_name=myapp
-postgresql://host1:123,host2:456/somedb
+postgresql://host1:123,host2:456/somedb?target_session_attrs=any&application_name=myapp
Components of the hierarchical part of the URI can also
be given as parameters. For example:
@@ -1386,6 +1386,23 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
+
+
+ target_session_attrs
+
+
+ If this parameter is set to read-write, only a
+ connection in which read-write transactions are accepted by default
+ is considered acceptable. The query
+ show transaction_read_only will be sent upon any
+ successful connection; if it returns on>, the connection
+ will be closed. If multiple hosts were specified in the connection
+ string, any remaining servers will be tried just as if the connection
+ attempt had failed. The default value of this parameter,
+ any>, regards all connections as acceptable.
+
+
+
@@ -7069,6 +7086,16 @@ myEventProc(PGEventId evtId, void *evtInfo, void *passThrough)
linkend="libpq-connect-client-encoding"> connection parameter.
+
+
+
+
+ PGTARGETSESSIONATTRS
+
+ PGTARGETSESSIONATTRS behaves the same as the connection parameter.
+
+
diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index 3e9c45bc40..cd96ddb2f0 100644
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -108,6 +108,7 @@ static int ldapServiceLookup(const char *purl, PQconninfoOption *options,
#define DefaultOption ""
#define DefaultAuthtype ""
#define DefaultPassword ""
+#define DefaultTargetSessionAttrs "any"
#ifdef USE_SSL
#define DefaultSSLMode "prefer"
#else
@@ -300,6 +301,11 @@ static const internalPQconninfoOption PQconninfoOptions[] = {
"Replication", "D", 5,
offsetof(struct pg_conn, replication)},
+ {"target_session_attrs", "PGTARGETSESSIONATTRS",
+ DefaultTargetSessionAttrs, NULL,
+ "Target-Session-Attrs", "", 11, /* sizeof("read-write") = 11 */
+ offsetof(struct pg_conn, target_session_attrs)},
+
/* Terminating entry --- MUST BE LAST */
{NULL, NULL, NULL, NULL,
NULL, NULL, 0}
@@ -336,6 +342,8 @@ static PGconn *makeEmptyPGconn(void);
static bool fillPGconn(PGconn *conn, PQconninfoOption *connOptions);
static void freePGconn(PGconn *conn);
static void closePGconn(PGconn *conn);
+static void release_all_addrinfo(PGconn *conn);
+static void sendTerminateConn(PGconn *conn);
static PQconninfoOption *conninfo_init(PQExpBuffer errorMessage);
static PQconninfoOption *parse_connection_string(const char *conninfo,
PQExpBuffer errorMessage, bool use_defaults);
@@ -1025,6 +1033,22 @@ connectOptions2(PGconn *conn)
goto oom_error;
}
+ /*
+ * Validate target_session_attrs option.
+ */
+ if (conn->target_session_attrs)
+ {
+ if (strcmp(conn->target_session_attrs, "any") != 0
+ && strcmp(conn->target_session_attrs, "read-write") != 0)
+ {
+ conn->status = CONNECTION_BAD;
+ printfPQExpBuffer(&conn->errorMessage,
+ libpq_gettext("invalid target_session_attrs value: \"%s\"\n"),
+ conn->target_session_attrs);
+ return false;
+ }
+ }
+
/*
* Only if we get this far is it appropriate to try to connect. (We need a
* state flag, rather than just the boolean result of this function, in
@@ -1814,6 +1838,7 @@ PQconnectPoll(PGconn *conn)
/* Special cases: proceed without waiting. */
case CONNECTION_SSL_STARTUP:
case CONNECTION_NEEDED:
+ case CONNECTION_CHECK_WRITABLE:
break;
default:
@@ -2752,27 +2777,6 @@ keep_going: /* We will come back to here until there is
goto error_return;
}
- /* We can release the address lists now. */
- if (conn->connhost != NULL)
- {
- int i;
-
- for (i = 0; i < conn->nconnhost; ++i)
- {
- int family = AF_UNSPEC;
-
-#ifdef HAVE_UNIX_SOCKETS
- if (conn->connhost[i].type == CHT_UNIX_SOCKET)
- family = AF_UNIX;
-#endif
-
- pg_freeaddrinfo_all(family,
- conn->connhost[i].addrlist);
- conn->connhost[i].addrlist = NULL;
- }
- }
- conn->addr_cur = NULL;
-
/* Fire up post-connection housekeeping if needed */
if (PG_PROTOCOL_MAJOR(conn->pversion) < 3)
{
@@ -2782,7 +2786,24 @@ keep_going: /* We will come back to here until there is
return PGRES_POLLING_WRITING;
}
- /* Otherwise, we are open for business! */
+ /*
+ * If a read-write connection is required, see if we have one.
+ */
+ if (conn->target_session_attrs != NULL &&
+ strcmp(conn->target_session_attrs, "read-write") == 0)
+ {
+ conn->status = CONNECTION_OK;
+ if (!PQsendQuery(conn,
+ "show transaction_read_only"))
+ goto error_return;
+ conn->status = CONNECTION_CHECK_WRITABLE;
+ return PGRES_POLLING_READING;
+ }
+
+ /* We can release the address lists now. */
+ release_all_addrinfo(conn);
+
+ /* We are open for business! */
conn->status = CONNECTION_OK;
return PGRES_POLLING_OK;
}
@@ -2814,10 +2835,109 @@ keep_going: /* We will come back to here until there is
goto error_return;
}
+ /*
+ * If a read-write connection is requisted check for same.
+ */
+ if (conn->target_session_attrs != NULL &&
+ strcmp(conn->target_session_attrs, "read-write") == 0)
+ {
+ conn->status = CONNECTION_OK;
+ if (!PQsendQuery(conn,
+ "show transaction_read_only"))
+ goto error_return;
+ conn->status = CONNECTION_CHECK_WRITABLE;
+ return PGRES_POLLING_READING;
+ }
+
+ /* We can release the address lists now. */
+ release_all_addrinfo(conn);
+
/* We are open for business! */
conn->status = CONNECTION_OK;
return PGRES_POLLING_OK;
+ case CONNECTION_CHECK_WRITABLE:
+ {
+ conn->status = CONNECTION_OK;
+ if (!PQconsumeInput(conn))
+ goto error_return;
+
+ if (PQisBusy(conn))
+ {
+ conn->status = CONNECTION_CHECK_WRITABLE;
+ return PGRES_POLLING_READING;
+ }
+
+ res = PQgetResult(conn);
+ if (res && (PQresultStatus(res) == PGRES_TUPLES_OK) &&
+ PQntuples(res) == 1)
+ {
+ char *val;
+
+ val = PQgetvalue(res, 0, 0);
+ if (strncmp(val, "on", 2) == 0)
+ {
+ PQclear(res);
+
+ /* Not writable; close connection. */
+ appendPQExpBuffer(&conn->errorMessage,
+ libpq_gettext("could not make a writable "
+ "connection to server "
+ "\"%s:%s\"\n"),
+ conn->connhost[conn->whichhost].host,
+ conn->connhost[conn->whichhost].port);
+ conn->status = CONNECTION_OK;
+ sendTerminateConn(conn);
+ pqDropConnection(conn, true);
+
+ /* Skip any remaining addresses for this host. */
+ conn->addr_cur = NULL;
+ if (conn->whichhost + 1 < conn->nconnhost)
+ {
+ conn->status = CONNECTION_NEEDED;
+ goto keep_going;
+ }
+
+ /* No more addresses to try. So we fail. */
+ goto error_return;
+ }
+ PQclear(res);
+
+ /* We can release the address lists now. */
+ release_all_addrinfo(conn);
+
+ /* We are open for business! */
+ conn->status = CONNECTION_OK;
+ return PGRES_POLLING_OK;
+ }
+
+ /*
+ * Something went wrong with "show transaction_read_only". We
+ * should try next addresses.
+ */
+ if (res)
+ PQclear(res);
+ appendPQExpBuffer(&conn->errorMessage,
+ libpq_gettext("test \"show transaction_read_only\" failed "
+ " on \"%s:%s\" \n"),
+ conn->connhost[conn->whichhost].host,
+ conn->connhost[conn->whichhost].port);
+ conn->status = CONNECTION_OK;
+ sendTerminateConn(conn);
+ pqDropConnection(conn, true);
+
+ if (conn->addr_cur->ai_next != NULL ||
+ conn->whichhost + 1 < conn->nconnhost)
+ {
+ conn->addr_cur = conn->addr_cur->ai_next;
+ conn->status = CONNECTION_NEEDED;
+ goto keep_going;
+ }
+
+ /* No more addresses to try. So we fail. */
+ goto error_return;
+ }
+
default:
appendPQExpBuffer(&conn->errorMessage,
libpq_gettext("invalid connection state %d, "
@@ -3109,6 +3229,8 @@ freePGconn(PGconn *conn)
free(conn->outBuffer);
if (conn->rowBuf)
free(conn->rowBuf);
+ if (conn->target_session_attrs)
+ free(conn->target_session_attrs);
termPQExpBuffer(&conn->errorMessage);
termPQExpBuffer(&conn->workBuffer);
@@ -3120,19 +3242,41 @@ freePGconn(PGconn *conn)
}
/*
- * closePGconn
- * - properly close a connection to the backend
- *
- * This should reset or release all transient state, but NOT the connection
- * parameters. On exit, the PGconn should be in condition to start a fresh
- * connection with the same parameters (see PQreset()).
+ * release_all_addrinfo
+ * - free addrinfo of all hostconn elements.
*/
+
static void
-closePGconn(PGconn *conn)
+release_all_addrinfo(PGconn *conn)
{
- PGnotify *notify;
- pgParameterStatus *pstatus;
+ if (conn->connhost != NULL)
+ {
+ int i;
+
+ for (i = 0; i < conn->nconnhost; ++i)
+ {
+ int family = AF_UNSPEC;
+
+#ifdef HAVE_UNIX_SOCKETS
+ if (conn->connhost[i].type == CHT_UNIX_SOCKET)
+ family = AF_UNIX;
+#endif
+ pg_freeaddrinfo_all(family,
+ conn->connhost[i].addrlist);
+ conn->connhost[i].addrlist = NULL;
+ }
+ }
+ conn->addr_cur = NULL;
+}
+
+/*
+ * sendTerminateConn
+ * - Send a terminate message to backend.
+ */
+static void
+sendTerminateConn(PGconn *conn)
+{
/*
* Note that the protocol doesn't allow us to send Terminate messages
* during the startup phase.
@@ -3147,6 +3291,23 @@ closePGconn(PGconn *conn)
pqPutMsgEnd(conn);
(void) pqFlush(conn);
}
+}
+
+/*
+ * closePGconn
+ * - properly close a connection to the backend
+ *
+ * This should reset or release all transient state, but NOT the connection
+ * parameters. On exit, the PGconn should be in condition to start a fresh
+ * connection with the same parameters (see PQreset()).
+ */
+static void
+closePGconn(PGconn *conn)
+{
+ PGnotify *notify;
+ pgParameterStatus *pstatus;
+
+ sendTerminateConn(conn);
/*
* Must reset the blocking status so a possible reconnect will work.
@@ -3165,25 +3326,8 @@ closePGconn(PGconn *conn)
conn->asyncStatus = PGASYNC_IDLE;
pqClearAsyncResult(conn); /* deallocate result */
resetPQExpBuffer(&conn->errorMessage);
- if (conn->connhost != NULL)
- {
- int i;
-
- for (i = 0; i < conn->nconnhost; ++i)
- {
- int family = AF_UNSPEC;
-
-#ifdef HAVE_UNIX_SOCKETS
- if (conn->connhost[i].type == CHT_UNIX_SOCKET)
- family = AF_UNIX;
-#endif
+ release_all_addrinfo(conn);
- pg_freeaddrinfo_all(family,
- conn->connhost[i].addrlist);
- conn->connhost[i].addrlist = NULL;
- }
- }
- conn->addr_cur = NULL;
notify = conn->notifyHead;
while (notify != NULL)
{
diff --git a/src/interfaces/libpq/libpq-fe.h b/src/interfaces/libpq/libpq-fe.h
index 9ca0756c4b..20b7e57de7 100644
--- a/src/interfaces/libpq/libpq-fe.h
+++ b/src/interfaces/libpq/libpq-fe.h
@@ -62,7 +62,9 @@ typedef enum
* backend startup. */
CONNECTION_SETENV, /* Negotiating environment. */
CONNECTION_SSL_STARTUP, /* Negotiating SSL. */
- CONNECTION_NEEDED /* Internal state: connect() needed */
+ CONNECTION_NEEDED, /* Internal state: connect() needed */
+ CONNECTION_CHECK_WRITABLE /* Check if we could make a writable
+ * connection. */
} ConnStatusType;
typedef enum
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index 854ec89924..a2f85895a1 100644
--- a/src/interfaces/libpq/libpq-int.h
+++ b/src/interfaces/libpq/libpq-int.h
@@ -361,6 +361,9 @@ struct pg_conn
char *krbsrvname; /* Kerberos service name */
#endif
+ char *target_session_attrs; /* Type of connection to make
+ * Possible values any, read-write. */
+
/* Optional file to write trace info to */
FILE *Pfdebug;
--
cgit v1.2.3
From 11da83a0e70d32ed0e06a5c948cd8343f8ad5102 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Tue, 29 Nov 2016 14:08:23 -0500
Subject: Add uuid to the set of types supported by contrib/btree_gist.
Paul Jungwirth, reviewed and hacked on by Teodor Sigaev, Ildus
Kurbangaliev, Adam Brusselback, Chris Bandy, and myself.
Discussion: https://postgr.es/m/CA+renyUEE29=X01JXdz8_TQvo6n9=2XoEBBRnQ8rkLyr+kjPxQ@mail.gmail.com
Discussion: https://postgr.es/m/55F6EE82.8080209@sigaev.ru
---
contrib/btree_gist/Makefile | 8 +-
contrib/btree_gist/btree_gist--1.2--1.3.sql | 65 +++
contrib/btree_gist/btree_gist.control | 2 +-
contrib/btree_gist/btree_gist.h | 3 +-
contrib/btree_gist/btree_uuid.c | 238 ++++++++++
contrib/btree_gist/data/uuid.data | 703 ++++++++++++++++++++++++++++
contrib/btree_gist/expected/uuid.out | 66 +++
contrib/btree_gist/sql/uuid.sql | 31 ++
doc/src/sgml/btree-gist.sgml | 8 +-
src/backend/utils/adt/uuid.c | 9 -
src/include/utils/uuid.h | 8 +-
11 files changed, 1120 insertions(+), 21 deletions(-)
create mode 100644 contrib/btree_gist/btree_gist--1.2--1.3.sql
create mode 100644 contrib/btree_gist/btree_uuid.c
create mode 100644 contrib/btree_gist/data/uuid.data
create mode 100644 contrib/btree_gist/expected/uuid.out
create mode 100644 contrib/btree_gist/sql/uuid.sql
(limited to 'doc/src')
diff --git a/contrib/btree_gist/Makefile b/contrib/btree_gist/Makefile
index 5134f72611..d36f51795d 100644
--- a/contrib/btree_gist/Makefile
+++ b/contrib/btree_gist/Makefile
@@ -6,16 +6,16 @@ OBJS = btree_gist.o btree_utils_num.o btree_utils_var.o btree_int2.o \
btree_int4.o btree_int8.o btree_float4.o btree_float8.o btree_cash.o \
btree_oid.o btree_ts.o btree_time.o btree_date.o btree_interval.o \
btree_macaddr.o btree_inet.o btree_text.o btree_bytea.o btree_bit.o \
- btree_numeric.o $(WIN32RES)
+ btree_numeric.o btree_uuid.o $(WIN32RES)
EXTENSION = btree_gist
-DATA = btree_gist--1.2.sql btree_gist--1.1--1.2.sql btree_gist--1.0--1.1.sql \
- btree_gist--unpackaged--1.0.sql
+DATA = btree_gist--unpackaged--1.0.sql btree_gist--1.0--1.1.sql \
+ btree_gist--1.1--1.2.sql btree_gist--1.2.sql btree_gist--1.2--1.3.sql
PGFILEDESC = "btree_gist - B-tree equivalent GiST operator classes"
REGRESS = init int2 int4 int8 float4 float8 cash oid timestamp timestamptz \
time timetz date interval macaddr inet cidr text varchar char bytea \
- bit varbit numeric not_equal
+ bit varbit numeric uuid not_equal
SHLIB_LINK += $(filter -lm, $(LIBS))
diff --git a/contrib/btree_gist/btree_gist--1.2--1.3.sql b/contrib/btree_gist/btree_gist--1.2--1.3.sql
new file mode 100644
index 0000000000..726561e87b
--- /dev/null
+++ b/contrib/btree_gist/btree_gist--1.2--1.3.sql
@@ -0,0 +1,65 @@
+/* contrib/btree_gist/btree_gist--1.2--1.3.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION btree_gist UPDATE TO '1.3'" to load this file. \quit
+
+-- Add support for indexing UUID columns
+
+-- define the GiST support methods
+CREATE FUNCTION gbt_uuid_consistent(internal,uuid,int2,oid,internal)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE STRICT;
+
+CREATE FUNCTION gbt_uuid_fetch(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE STRICT;
+
+CREATE FUNCTION gbt_uuid_compress(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE STRICT;
+
+CREATE FUNCTION gbt_uuid_penalty(internal,internal,internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE STRICT;
+
+CREATE FUNCTION gbt_uuid_picksplit(internal, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE STRICT;
+
+CREATE FUNCTION gbt_uuid_union(internal, internal)
+RETURNS gbtreekey32
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE STRICT;
+
+CREATE FUNCTION gbt_uuid_same(gbtreekey32, gbtreekey32, internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE STRICT;
+
+-- Create the operator class
+CREATE OPERATOR CLASS gist_uuid_ops
+DEFAULT FOR TYPE uuid USING gist
+AS
+ OPERATOR 1 < ,
+ OPERATOR 2 <= ,
+ OPERATOR 3 = ,
+ OPERATOR 4 >= ,
+ OPERATOR 5 > ,
+ FUNCTION 1 gbt_uuid_consistent (internal, uuid, int2, oid, internal),
+ FUNCTION 2 gbt_uuid_union (internal, internal),
+ FUNCTION 3 gbt_uuid_compress (internal),
+ FUNCTION 4 gbt_decompress (internal),
+ FUNCTION 5 gbt_uuid_penalty (internal, internal, internal),
+ FUNCTION 6 gbt_uuid_picksplit (internal, internal),
+ FUNCTION 7 gbt_uuid_same (gbtreekey32, gbtreekey32, internal),
+ STORAGE gbtreekey32;
+
+-- These are "loose" in the opfamily for consistency with the rest of btree_gist
+ALTER OPERATOR FAMILY gist_uuid_ops USING gist ADD
+ OPERATOR 6 <> (uuid, uuid) ,
+ FUNCTION 9 (uuid, uuid) gbt_uuid_fetch (internal) ;
diff --git a/contrib/btree_gist/btree_gist.control b/contrib/btree_gist/btree_gist.control
index 74d0e92591..ddbf83dc32 100644
--- a/contrib/btree_gist/btree_gist.control
+++ b/contrib/btree_gist/btree_gist.control
@@ -1,5 +1,5 @@
# btree_gist extension
comment = 'support for indexing common datatypes in GiST'
-default_version = '1.2'
+default_version = '1.3'
module_pathname = '$libdir/btree_gist'
relocatable = true
diff --git a/contrib/btree_gist/btree_gist.h b/contrib/btree_gist/btree_gist.h
index 191202aede..9b3e22c469 100644
--- a/contrib/btree_gist/btree_gist.h
+++ b/contrib/btree_gist/btree_gist.h
@@ -31,7 +31,8 @@ enum gbtree_type
gbt_t_bpchar,
gbt_t_bytea,
gbt_t_bit,
- gbt_t_inet
+ gbt_t_inet,
+ gbt_t_uuid
};
#endif
diff --git a/contrib/btree_gist/btree_uuid.c b/contrib/btree_gist/btree_uuid.c
new file mode 100644
index 0000000000..44cef64cac
--- /dev/null
+++ b/contrib/btree_gist/btree_uuid.c
@@ -0,0 +1,238 @@
+/*
+ * contrib/btree_gist/btree_uuid.c
+ */
+#include "postgres.h"
+
+#include "btree_gist.h"
+#include "btree_utils_num.h"
+#include "port/pg_bswap.h"
+#include "utils/uuid.h"
+
+typedef struct
+{
+ pg_uuid_t lower,
+ upper;
+} uuidKEY;
+
+
+/*
+ * UUID ops
+ */
+PG_FUNCTION_INFO_V1(gbt_uuid_compress);
+PG_FUNCTION_INFO_V1(gbt_uuid_fetch);
+PG_FUNCTION_INFO_V1(gbt_uuid_union);
+PG_FUNCTION_INFO_V1(gbt_uuid_picksplit);
+PG_FUNCTION_INFO_V1(gbt_uuid_consistent);
+PG_FUNCTION_INFO_V1(gbt_uuid_penalty);
+PG_FUNCTION_INFO_V1(gbt_uuid_same);
+
+
+static int
+uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2)
+{
+ return memcmp(arg1->data, arg2->data, UUID_LEN);
+}
+
+static bool
+gbt_uuidgt(const void *a, const void *b)
+{
+ return uuid_internal_cmp((const pg_uuid_t *) a, (const pg_uuid_t *) b) > 0;
+}
+
+static bool
+gbt_uuidge(const void *a, const void *b)
+{
+ return uuid_internal_cmp((const pg_uuid_t *) a, (const pg_uuid_t *) b) >= 0;
+}
+
+static bool
+gbt_uuideq(const void *a, const void *b)
+{
+ return uuid_internal_cmp((const pg_uuid_t *) a, (const pg_uuid_t *) b) == 0;
+}
+
+static bool
+gbt_uuidle(const void *a, const void *b)
+{
+ return uuid_internal_cmp((const pg_uuid_t *) a, (const pg_uuid_t *) b) <= 0;
+}
+
+static bool
+gbt_uuidlt(const void *a, const void *b)
+{
+ return uuid_internal_cmp((const pg_uuid_t *) a, (const pg_uuid_t *) b) < 0;
+}
+
+static int
+gbt_uuidkey_cmp(const void *a, const void *b)
+{
+ uuidKEY *ia = (uuidKEY *) (((const Nsrt *) a)->t);
+ uuidKEY *ib = (uuidKEY *) (((const Nsrt *) b)->t);
+ int res;
+
+ res = uuid_internal_cmp(&ia->lower, &ib->lower);
+ if (res == 0)
+ res = uuid_internal_cmp(&ia->upper, &ib->upper);
+ return res;
+}
+
+
+static const gbtree_ninfo tinfo =
+{
+ gbt_t_uuid,
+ UUID_LEN,
+ 32, /* sizeof(gbtreekey32) */
+ gbt_uuidgt,
+ gbt_uuidge,
+ gbt_uuideq,
+ gbt_uuidle,
+ gbt_uuidlt,
+ gbt_uuidkey_cmp,
+ NULL
+};
+
+
+/**************************************************
+ * uuid ops
+ **************************************************/
+
+
+Datum
+gbt_uuid_compress(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+ GISTENTRY *retval;
+
+ if (entry->leafkey)
+ {
+ char *r = (char *) palloc(2 * UUID_LEN);
+ pg_uuid_t *key = DatumGetUUIDP(entry->key);
+
+ retval = palloc(sizeof(GISTENTRY));
+
+ memcpy((void *) r, (void *) key, UUID_LEN);
+ memcpy((void *) (r + UUID_LEN), (void *) key, UUID_LEN);
+ gistentryinit(*retval, PointerGetDatum(r),
+ entry->rel, entry->page,
+ entry->offset, FALSE);
+ }
+ else
+ retval = entry;
+
+ PG_RETURN_POINTER(retval);
+}
+
+Datum
+gbt_uuid_fetch(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+
+ PG_RETURN_POINTER(gbt_num_fetch(entry, &tinfo));
+}
+
+Datum
+gbt_uuid_consistent(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+ pg_uuid_t *query = PG_GETARG_UUID_P(1);
+ StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
+
+ /* Oid subtype = PG_GETARG_OID(3); */
+ bool *recheck = (bool *) PG_GETARG_POINTER(4);
+ uuidKEY *kkk = (uuidKEY *) DatumGetPointer(entry->key);
+ GBT_NUMKEY_R key;
+
+ /* All cases served by this function are exact */
+ *recheck = false;
+
+ key.lower = (GBT_NUMKEY *) &kkk->lower;
+ key.upper = (GBT_NUMKEY *) &kkk->upper;
+
+ PG_RETURN_BOOL(
+ gbt_num_consistent(&key, (void *) query, &strategy,
+ GIST_LEAF(entry), &tinfo)
+ );
+}
+
+Datum
+gbt_uuid_union(PG_FUNCTION_ARGS)
+{
+ GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
+ void *out = palloc(sizeof(uuidKEY));
+
+ *(int *) PG_GETARG_POINTER(1) = sizeof(uuidKEY);
+ PG_RETURN_POINTER(gbt_num_union((void *) out, entryvec, &tinfo));
+}
+
+/*
+ * Convert a uuid to a "double" value for estimating sizes of ranges.
+ */
+static double
+uuid_2_double(const pg_uuid_t *u)
+{
+ uint64 uu[2];
+ const double two64 = 18446744073709551616.0; /* 2^64 */
+
+ /* Source data may not be suitably aligned, so copy */
+ memcpy(uu, u->data, UUID_LEN);
+
+ /*
+ * uuid values should be considered as big-endian numbers, since that
+ * corresponds to how memcmp will compare them. On a little-endian
+ * machine, byte-swap each half so we can use native uint64 arithmetic.
+ */
+#ifndef WORDS_BIGENDIAN
+ uu[0] = BSWAP64(uu[0]);
+ uu[1] = BSWAP64(uu[1]);
+#endif
+
+ /*
+ * 2^128 is about 3.4e38, which in theory could exceed the range of
+ * "double" (POSIX only requires 1e37). To avoid any risk of overflow,
+ * put the decimal point between the two halves rather than treating the
+ * uuid value as a 128-bit integer.
+ */
+ return (double) uu[0] + (double) uu[1] / two64;
+}
+
+Datum
+gbt_uuid_penalty(PG_FUNCTION_ARGS)
+{
+ uuidKEY *origentry = (uuidKEY *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
+ uuidKEY *newentry = (uuidKEY *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
+ float *result = (float *) PG_GETARG_POINTER(2);
+ double olower,
+ oupper,
+ nlower,
+ nupper;
+
+ olower = uuid_2_double(&origentry->lower);
+ oupper = uuid_2_double(&origentry->upper);
+ nlower = uuid_2_double(&newentry->lower);
+ nupper = uuid_2_double(&newentry->upper);
+
+ penalty_num(result, olower, oupper, nlower, nupper);
+
+ PG_RETURN_POINTER(result);
+}
+
+Datum
+gbt_uuid_picksplit(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_POINTER(gbt_num_picksplit(
+ (GistEntryVector *) PG_GETARG_POINTER(0),
+ (GIST_SPLITVEC *) PG_GETARG_POINTER(1),
+ &tinfo
+ ));
+}
+
+Datum
+gbt_uuid_same(PG_FUNCTION_ARGS)
+{
+ uuidKEY *b1 = (uuidKEY *) PG_GETARG_POINTER(0);
+ uuidKEY *b2 = (uuidKEY *) PG_GETARG_POINTER(1);
+ bool *result = (bool *) PG_GETARG_POINTER(2);
+
+ *result = gbt_num_same((void *) b1, (void *) b2, &tinfo);
+ PG_RETURN_POINTER(result);
+}
diff --git a/contrib/btree_gist/data/uuid.data b/contrib/btree_gist/data/uuid.data
new file mode 100644
index 0000000000..df118d3670
--- /dev/null
+++ b/contrib/btree_gist/data/uuid.data
@@ -0,0 +1,703 @@
+5ad32d7e-b463-4363-a65f-52475d9fab27
+7787b66e-cf0e-48c2-b989-1e44e8a00891
+75ec8a55-afcf-442d-9e1d-f9d67a15caf9
+6f36f2e9-5e58-4961-9e8d-9c3ca1cfcd44
+78f6d184-f74e-4a38-81ce-a821e301e9ac
+1aa4bf49-dd76-40df-a86c-393fd202b710
+98559f1d-00b3-417e-bc57-9053545a260d
+ac24b4d7-1a8f-4abc-a02e-b3294497d18e
+a425d99f-ee91-419a-8cd1-6908b8c89679
+815f3632-4d1b-42e3-8dbf-68a20e7fbea5
+ae212041-e64a-4ff1-b8c9-b922fc3f2087
+8e580963-0584-4391-9f1e-3e607d435bd8
+9348cf2f-fe8b-4f05-829e-e2f732482bcc
+807b3ff5-0dac-46e1-ba10-07753ec4c7f5
+93325505-83f0-41f4-a060-0d6e1a166815
+cabeaad4-0096-4bc8-8d29-29623d3658c9
+8edda4dd-07fc-4457-b9c0-a28a97cdf9ee
+4b7b09ae-d11d-4d54-aad2-c7dcbce92e04
+3ba8ace3-be63-4a11-b057-8ede07d49089
+70dfc341-127f-4d54-a8d4-8159eb119a8f
+\N
+f4910786-02d1-4874-9be7-c2cd5774fa1e
+61a8407e-91f4-41f4-8050-32c8659839ac
+d23c6778-d021-426b-8435-e7ee7172a456
+\N
+77b3bb98-f8a6-4dc1-b2db-174da47ef88d
+4ed9962e-9f7b-4bd2-b791-62c87e7e0f32
+\N
+bf30fce0-2497-4594-94e2-5dabb58fa3d6
+289dcd8c-8046-4748-b1e7-3afc51e3791b
+622aa432-d5e9-4d02-8a3f-09813dcc00cf
+f95c69c4-7d22-41c2-89c1-6bcc6835d478
+2b4ed1c1-38e2-495c-8fba-f2060b983f8a
+05905429-70b8-408b-a9b1-b8f00522ec7a
+8b1dcbd2-10ca-4a37-a080-a1c846519370
+a6225a20-25c9-4afe-bc33-e6f600eb57a3
+b0121e3f-157c-4c8f-acf3-78dfc752bcb5
+\N
+\N
+11f510fb-ae06-4542-b936-cb5713908e10
+\N
+26293032-dbd0-4914-be29-c060c5adf98b
+d3d0b0ce-b60c-4864-9557-ba4a9cca8b1f
+16c70611-bbfd-4a46-95fd-3c9e12ff4641
+27dc15a1-5518-4310-bb31-52da9f148afc
+a86c2830-282a-4a74-8b5a-e5b562325d82
+8afadc39-3a85-490e-a31e-18d5e787a639
+1159af7d-4d72-4651-a013-9de9f3f03002
+5835b0b1-6a50-44c2-ab41-81fa34a29411
+\N
+e031ab87-ccb2-496c-9eba-9966415f82d7
+\N
+67b1194b-b700-440e-aa35-93f118707632
+a9a88b6a-c2f0-4de6-ab99-b34853aae425
+c9e3aeda-2001-4ab9-aaf1-57b33d841960
+6bbc3fae-1495-4178-8445-04960dd97c56
+bd7e52f5-4362-4447-9f7c-da14ed54266b
+145fd346-4057-4e91-aa82-594d7a16ebce
+\N
+029d9c72-77e1-4951-b185-30574d00862d
+d9233d92-3491-4985-a6bb-763f177535a5
+abe1ffca-8325-4041-8acd-b08904f76cb0
+0c8afca8-f720-4c4b-aab4-e061011db5ca
+4f636f73-71e8-4b14-b4c5-25233d115421
+f05a62f1-f495-4ea1-9e20-433c67d2c94e
+a21ee4b2-e9bd-4510-9b74-0bac543fc378
+a41bf7a6-92c2-48f1-80ef-0c3d4e002b5f
+ed6b5777-b1b4-4e85-ab12-ce1f8054ced1
+04b9cc30-6d01-4f5d-9254-81eb78fbb4af
+a89561d0-88e2-43d0-9548-35be00b1f028
+c77ad187-1ed9-4b25-a474-35bf8481cf25
+cc4cf4a1-aa64-4916-ae50-5fa7bb5c3b3b
+\N
+70337711-dd30-4454-bd6c-0e8f77ba34a0
+1ed02b49-aace-4c5b-a1cf-bc20e2edf03c
+84b0f797-baa6-448f-ba29-0234e64ac30b
+40556603-648c-4359-9c4c-5e76f21efd1f
+af21ec76-27d6-47d8-9ede-1b08049410d3
+0f889e8a-e3b5-46bb-b222-e91cf32623ff
+3f5403c1-16b1-4a36-bf4b-77333226e8e4
+5c5ebc3f-aced-4036-9b4a-59e19bd9c74c
+8acec555-d518-422b-b416-19a2c5cbf825
+ab1effba-c614-4af2-9c9a-df8f6566f427
+4ae0722c-1c55-4ef1-a9aa-ddd9c43f3dd0
+77e2f0bf-cff2-4128-92da-c6d4bc656267
+2e643fa5-23db-4073-8dac-6f714f823af0
+d6624eac-e776-4e18-9d3c-d6c265a8c378
+\N
+623353ca-0675-4a05-94cf-7bb62e49aece
+2d29a192-0272-49fa-99ea-9b30e81f0b47
+54cf75a7-6ece-4e20-8f24-036f3daa2fa1
+8964987c-bacd-4474-83a7-2ea51ac7ff3a
+87f3e366-6f70-4304-8789-e03f94adcadb
+603c2f61-c91c-4f7b-9de4-ad43ea68b00a
+fd036a77-ee2a-4c33-a985-709d8667c1a2
+be90eb3d-c147-447b-8ff1-2a62aba6ef3c
+448ec399-4249-4daf-85f8-23ad8cc7fbdf
+9788716f-1852-4b37-a2c4-f4f65d4eeef6
+bed0413b-64f7-4cf3-b2bd-765b10a31ba8
+6add9145-07d8-4d1d-bad0-57c2a914b90b
+\N
+caf27337-384f-4f51-92ce-76bd5dbf7317
+0f73ab51-4dc6-4cbf-b74f-faced034c866
+0b8c972d-f1e4-4150-bd9e-baee71af1ca9
+0567dd6d-0386-4002-849b-591d1512cc1e
+4c783517-29df-4efa-8f40-12150943e6ab
+1be634a9-77cc-4e17-8b24-817067dd2235
+ab233cca-286f-4977-ab03-0c9870b17934
+88d77def-4019-4b0d-8de5-47f3b84b4c1f
+036f53a5-5270-4844-bedb-251512538a33
+\N
+\N
+afca4ef6-56ac-4bbe-8c44-f54858b55ab7
+8613d2fe-d9b6-4491-a70d-8de3ad41b4df
+f084c252-43db-4735-bb47-eb45741cedf8
+d35f523d-4c8b-43a7-8839-2c5567ed4934
+40677618-afed-45cd-9299-ff715494d56f
+7a5aa5c4-884f-460f-a9f4-a0033eb82de2
+26007d37-61cc-4386-a85e-08e56de23a2c
+3753e811-da3f-4a23-92ce-1ae502c99195
+2dfb1eb2-56bf-4b0f-b2f8-3ec2dc2b0191
+551d2a52-f84a-42e4-aa2b-84f7000a3926
+\N
+\N
+b9cd6a67-e12b-4a0b-b935-92e267ea776f
+fd4ec635-a1e5-4fcb-89a3-57bf8aa408b8
+6599e40d-a602-4a54-a24b-a694432992a2
+\N
+55e65ca2-4136-4a4b-ba78-02ecb8053cf3
+c5b2bc4d-acf9-4b11-ae9a-02ecb8053cf3
+34bb5f60-3e47-445a-acbd-a5889334f4d5
+743008b5-0779-4d50-8662-1c5988335c33
+71317e40-193f-4a7f-a94a-a188847b0249
+23f14966-adfc-4460-84fa-14190d33d55f
+d1bd4bba-8a17-4e8f-93d9-ed09f4719ea4
+b5964792-f607-4cbe-9759-3e459a90fe5e
+c49c96e9-77ec-4b75-a3d3-00f9fa8c9ac2
+842f694a-426c-45c7-a7a9-5d750adbdacb
+c48446ee-c9f6-4038-9ca2-e32f8706c36f
+fd0ab627-022b-4fa5-bb9d-0ada8088c9e6
+b27dd1a0-7938-4e42-a9e7-93672a7d85a1
+\N
+5ae9add1-28d5-4fd4-bf1d-f5a3ebabc31e
+55e65ca2-4136-4a4b-ba78-cd3fe4678203
+25706dc9-61d1-4c67-be11-22f953132d48
+723f5921-c592-40a8-955c-7f46995b8173
+69c29563-77e3-43e6-93e4-d6ed321a294e
+8d6b7ea4-d062-4fb9-a20a-d718abd9e331
+b1e945bc-2a8e-4d1c-b9e7-322b95d31b86
+4ef84213-0b39-4d86-80bb-14d2376f77cf
+fd3daf86-96e0-42b7-b83a-c2781aa28e4d
+ad6c141d-e257-410b-8596-77153d37b428
+40dd6844-7b56-4f39-a7cb-c00ae3870108
+883559f7-e3af-4f44-aff3-a4bd906e5f86
+98c04f9a-d3b7-4d21-9f89-43ef124d48fa
+\N
+55e65ca2-4136-4a4b-ba78-698a09529f02
+ee145a4b-6016-4914-b947-698a09529f02
+931473e1-2cb2-4951-9ab0-039396dc2ccb
+0df573b5-3ef9-40ef-9b05-208e649965d1
+b13a6d18-33c9-4aa0-b167-88542cd5d8ba
+73cbd742-37db-4497-9a30-e10279e9cb10
+63a68ba8-e223-4db7-b56f-459d25d3f873
+aa2e9b1e-e09c-4848-983e-aa4adf34938b
+2d58bb19-ac14-42fe-b427-59798b8d9fcb
+5d3b3f34-ce9a-4ee6-9400-c7bf7576ffa8
+41945dd7-9fb0-47af-8ee9-10e5a530da3e
+\N
+c3a9b409-4e0c-4426-8a3d-a87236d189bd
+84da10fe-818d-4a3b-85fc-d4c169ca92bc
+12a7071d-902d-4438-9c40-c2cd875b51f7
+2e7e60fb-ba46-4005-b6b5-e3630b699254
+9782a6b8-d676-45a8-8991-1c44e60764cc
+6760f7e2-6b5a-4ce7-a66a-9014068fc8eb
+\N
+\N
+2dab7131-2ba8-4def-9b28-00c0b1ed929a
+9d3df737-16fd-481a-bde6-12a08a01e2f5
+d8390c84-1e90-4759-bd72-2629ca4c51f4
+0c1d5e42-5070-4e51-8076-8e8c15148f99
+9374026b-0ec3-4c44-b656-36520d322eec
+\N
+\N
+ad0aa9a0-122c-4025-b889-70778af8e6e4
+948d0dea-5d30-45da-8867-09d2e1fbedb9
+d295f644-5f4a-4808-b966-b14e73d9fa2a
+2de79a17-f91b-4310-ad74-dfa0729ff94d
+\N
+23eda39b-4f4f-4355-aeba-72771f922d20
+bccc111d-6f15-42c4-95c2-2c3d3329e83c
+c4f864e2-f81a-4b65-bd7f-cac745bd7f32
+10447976-1ef1-4599-85fe-d837074c6ea0
+8eecc1f2-77b2-4058-811c-11efb47eb662
+98ad70bc-39b0-4b33-a104-241c6970c4c3
+15c04e8c-69d9-44b7-a177-d08c5b181045
+c6de918d-6536-47b3-b80f-80d049cfe550
+548321e5-cde7-4d74-9bb3-929244806e13
+7e046db3-d1dc-46b3-81e2-fc867ee83298
+5dd5cc44-4ad1-4658-9b4f-509c35a4976e
+1aecf136-7583-4d31-980f-7fa5c4610e50
+ddb79c27-a80d-483a-9a52-c61ae2f4f93f
+e7aa3080-4d09-43ae-8a2b-5b539ffef909
+aeba0aac-6b8d-4d8e-a5b8-a37359c457c6
+64b5c8e8-118b-414f-9da3-73286249243c
+8b72b6d2-d3bb-4e74-a79f-577ff8767777
+dccd9254-24b4-4269-bf07-cfbbd349df54
+51cb6ab8-06c7-4280-9e0f-b2b9dcf026a3
+1f3d16eb-895f-48dd-a9e2-18eda41dacea
+3ecc08b6-2b67-442c-92ef-b74af4cb4f42
+b55c7f4a-3d05-43b9-846a-23b86a73d202
+4f54dfd8-39fd-4178-bafa-97b34f131552
+d6874b48-96c0-459f-8dd1-7f7dcbc44b22
+93a213c8-6e28-4eb7-8a94-e1b18e8bebd6
+0743014a-e4a0-4eb9-a9ab-23b0d0d96782
+a7ffb6c6-4e3d-4d08-9cbc-297d8b1bc29e
+45e788c4-4054-4d80-886a-e4f006936b7d
+c51c64be-f130-40c1-95b9-adb39ef25385
+3af1d3cb-df6b-4d26-910d-a314a57f8550
+\N
+477c1cd3-e9c9-4252-bc7c-4db57368ffc6
+b49ecde1-7413-4d6e-80c1-5df680d25617
+1f127009-eb85-4e7d-bfb1-1d804d86609b
+\N
+25e6a224-70a3-44dc-8faf-50feecee3480
+79094fc2-36d9-4521-bf36-0f6e0850fb6d
+cb9b1ab9-96e3-4ac5-aa51-8c3c47437893
+c09d4ab8-5f03-496f-ad31-4b9e7348b9da
+d5ab17e1-7017-4fbf-a30e-73a97a85a852
+\N
+8c937920-7219-43fe-8436-97aca339a5e5
+b8d001a3-036c-4f15-846d-32e229d66985
+bf2cae32-9096-4ae4-b0c7-efdafdd16e0d
+d916d001-fc9b-4e05-985c-b172b873c217
+\N
+\N
+e2c40a42-ef79-4754-a089-e254345896c3
+953fb2d0-d53a-4cc9-ad48-547263b85399
+8e2b1b14-1c56-48b9-a2b6-7868b42e6efc
+\N
+\N
+76b7f2cb-00e9-4936-bcd0-ebf1db9c6fec
+e24d88fb-6219-44c7-9f70-d34e0b274cdc
+fda48ed2-113d-4403-b2dc-57f7afe25842
+9aa44ba6-6c48-418d-8689-eab1112b3fbb
+42936d25-09a3-4af5-ab98-8c1216c5eca9
+\N
+a9ddecb1-b762-4cda-a785-cce77c51c67a
+f733a811-b83c-4a29-a59c-786825ea2070
+95e94fe5-eea0-4656-bc34-15724a4785a2
+521c55e4-3a5c-4064-b42f-0e8023af9c94
+a3379a9d-4b32-4956-a814-57b46db4bdc2
+0e46cef6-49ed-4a54-a6be-debcb7546583
+\N
+1b1f4424-5c05-4ee6-8eb6-1deb677c743f
+\N
+caaf024d-ef32-42e5-8875-6fe563e7e18a
+\N
+98e9ce77-8677-44aa-8703-4633dd089225
+66037e2f-634f-4096-8823-9ded5cb533bb
+\N
+f6726b9f-8a0b-498e-8479-3ffa7af0cd62
+\N
+4fd27a3c-2dd1-4f3d-9f48-a50407e9d1a3
+75737e5a-a509-4239-a627-99e430a4c3d1
+6a8bd5d4-7545-4dc8-86c3-003321cfd437
+73e26e64-baa7-4a9b-8833-8bd822c47146
+\N
+d6de86fd-3fae-4de2-9082-e86e53b6ad7c
+1143e251-091a-408f-b7b2-c12d05b42c2c
+4cc745fe-4667-4041-b941-4a3a23f9c727
+ae7b16f4-9ba8-4dc0-aae7-f78258d4993b
+38c5d6b2-ae34-4054-aa07-9a689afb23e1
+3421dcd4-553b-4ba6-b661-5cbf687b982a
+\N
+75ab9d18-5ba5-479c-b3cc-bcf3c41618b8
+c5a768bc-7765-4973-bd8b-a9528279a0a7
+e7b98405-47e6-404d-ab16-bb76fcca2b1d
+\N
+6b764c11-a1f1-4ae2-ab87-88bdf7597cbe
+\N
+e439cb96-6502-4207-a0c8-2fa6bf583bd2
+\N
+0d3089c0-3e26-4425-b48a-3e8f18c8b5a2
+1a625bc4-f83b-4735-bf01-a9f748080fa5
+1d1f382c-c702-4c59-84f1-4b4e2f01d1ca
+8d3b094e-112e-4535-87b3-04272426a956
+616f4879-2a33-4a5f-a776-08462d27ee1b
+e2c83f19-3163-487c-8fc1-7fc00d40ca75
+dbf94f48-5f01-46dd-a13d-def8f9dbb6e0
+8c37b9f1-9766-4a5f-a222-eded294dad76
+\N
+4cb9a4a8-e4bf-4757-9d68-36a3e24d7b6f
+071ba528-c84a-45fc-8ac5-2368e6c640ae
+ad34c9ca-0a4b-4c23-b64c-ccffa1f4dcd8
+4915fbe9-84b6-4bec-b6b4-50ff0cc49e9f
+622ef77b-e22d-4ca2-850b-298abb39d870
+3e1803c7-8109-43f2-a742-b4ae6e56be55
+\N
+0de0b43f-3e59-4060-a43d-baee17dd2c57
+\N
+1a8e57f0-e65b-4e55-b46a-207825e00287
+\N
+\N
+1877c37c-3567-46e6-9afe-222ac3ccf36f
+8f4bab7d-9454-4081-b1af-8e4956ab940f
+c1d23aab-ce09-4afc-a95c-b9b0f7b9c16a
+04761b0d-40ce-4cb2-a7e0-b1f6d582be26
+a29059f5-d4bc-446f-b903-6f09ec57a410
+4f9fcf12-54ff-442e-a7e3-6aabf45fa474
+facf76d4-e82b-44a3-9d7c-50a84395a2dc
+\N
+84484041-8e47-4349-91d6-418e9b584750
+44b949a1-502e-4309-af86-90bdfb04a085
+c5da9f0a-cca7-4b5b-82c2-1108e10989df
+98e487b8-9527-4fcd-a20a-812c909f1342
+e950bf7c-280d-4774-8891-6cf6da8c8ab4
+d5a28af5-f357-4de9-a8e3-3518a8ca5556
+ae450238-e659-4253-90ee-afd66a676515
+294509e2-b4e1-42a3-8982-b7dca385e4c9
+9546a087-6cac-47d6-b55f-3828df9aec99
+4a715abf-18d1-4ffc-9ea3-fd3f069502d9
+e0e07aef-ce8b-4499-befa-430724707d66
+4df8c6d6-278e-4ba4-af45-7240805ce0a0
+1637abb1-1d05-4055-98fa-296fe8f8bbf2
+e3850663-b73e-4f0a-8107-0775825783ef
+7b2dbbbf-2c82-49dd-9383-0edd1aee6d89
+2b99818e-2c53-4ab2-8f78-29bdbaa208b2
+22787985-d2c1-4af4-8b90-7bc441123952
+153166fb-923e-41d4-8413-edad8959a8d5
+732fab66-3055-49f8-807e-463fae914988
+1c39ccc7-2b24-4760-93d3-3e687ed04e1c
+33f3d119-ff2a-4341-8145-b69f67e11192
+0370cd89-a0c1-4d8c-85fe-3df1eddc734b
+d5a5e95c-e99c-48f6-9ebb-765651b893f9
+3e73a023-7b53-4323-a1f3-511809d322dc
+44d2a25c-933c-4345-8d19-cc52daaa9f11
+fc25f498-455d-4a13-9ca4-9ed2b843da17
+f9737152-aa7c-47a3-b96f-d0560ccea84f
+\N
+c1140446-9885-4a23-a709-593c0ba3818d
+050474c6-e6f5-417f-bcb8-2cc288cc412c
+9f07cb2f-aec4-4108-a07a-ea51366043f2
+4d9cf071-119b-497d-88de-0d05d21bac34
+7f5de1dd-3848-4ad8-a0e6-fb4039eec4f8
+f97c4e03-e8ce-443f-846b-82485c8bf04d
+2354e08c-8415-4b24-8cc2-b7b7616b6588
+b0a5e7da-9382-4a9f-b64c-0c4e5c3755ca
+c4812583-5689-4384-8044-e200f796afee
+890b609b-ac04-4bf3-9070-492a5db11e1c
+b0789be3-59ad-44e5-b1c6-394df496ae1f
+512ee0f6-380b-4758-ae20-b10d578473a9
+ad84303a-90e0-44dd-a7a2-b6015e748067
+986947bf-e924-4fb8-bdec-bd91f1b9d634
+fa576306-7eb6-411d-aa92-9e67678530ee
+\N
+3d66ae17-f27e-45f0-8b95-d47821b1d09c
+e8f40639-2c3e-4802-96e1-b8ccf15762f1
+482c9541-9fa9-444f-8155-ceb86056f2f7
+f8dbf3c1-c67f-4a71-8bb1-b754d2fc4d9d
+4764906b-3b71-4ccf-80d2-fb985af7728a
+628312b0-5f3d-4856-a3b0-6e7f60dd19ee
+37b5c576-d2c0-40b5-91cf-712b41dea919
+e7eefe4c-e647-40ba-bde1-bf4aa780b0ab
+\N
+4089198b-e1f1-4cf5-aa74-1e32aef1f034
+\N
+5133a3c7-b9cd-4659-ad99-0453b433f237
+7d6c0ec0-0c7c-4a98-9d75-74a937b7ecba
+74faeb7e-e0ac-4505-b2c5-bad6a39c6abb
+ad895aff-3527-423b-bc82-607d9586f5fc
+19a14c87-ab30-4747-8dc4-7599b4015960
+44955907-c727-4cab-aec3-464332e444fa
+\N
+b5d4dc6b-b65b-4a36-8f16-cf9760631718
+\N
+49b4a368-b530-49ec-b8ed-7664c2dda4cc
+e70020b6-eeac-425b-a3ce-9f5ad642f371
+448473ab-2a0f-4200-b7a7-b7583002779e
+\N
+\N
+8a8b74f9-f49a-4f77-b6ab-2011df5645ca
+\N
+67ae0fa2-08c6-4566-92c4-adae5be3c3cc
+1453d200-133e-4df3-9723-eb43bd21d896
+a7f3072e-e567-4e23-9bd0-70aba0554281
+f9caa2e9-6d43-4559-a9e7-6e5d7e7b2769
+b6a0b42b-70a3-42e0-b623-9dee8e2d3b85
+\N
+05f2c97f-4c81-43a2-9c7b-8a1cf8de2474
+9287055b-ef1d-4b7f-bc28-fd637adaf530
+e0ed08d4-2521-46b1-983a-03c3cf915e42
+285a1259-f929-4e37-b25b-62af93fb1ea1
+d76631da-ace1-472c-a23d-7d4f2702f771
+80f89372-02be-4ad0-a1e5-9a2490769427
+\N
+3c1043f7-f77f-4788-abc7-5615804ccd69
+a3942f4f-27f4-44bf-bf28-6ae854d4d346
+3b741249-a9bd-452b-bd08-9ae337134f13
+5aea4b8f-7dbb-4b7a-ad1b-cee1d93a5393
+cae01e8c-e75d-4c3d-8d90-ee3ebdb011d0
+13aeefb0-dbba-4cee-b108-931f23e286b1
+\N
+40aee193-6c24-4a13-a004-9f4dec1ab2cf
+2b731fff-597d-4a6e-888c-2ec72fe0dbef
+4581b196-149e-492e-9053-5040207dcc19
+68d07598-261e-40bc-a2f5-e8f72cc86104
+94c1ecf0-2bdc-4d0f-962f-226a9617b8cb
+2fcfe646-edc5-4397-8032-c4b4cd88afce
+2ed39277-375a-4e9a-846d-660fe531bed2
+1244efdd-5d49-403a-9649-2550abae81f2
+281b757c-a039-4668-adf1-ff020ecf17ae
+10f75609-865a-4b80-b5b2-39c67aa70c33
+8fb26a73-0535-4603-961e-217353617786
+98a14b8f-9a24-4c1d-b823-26d07b3d0e30
+200890d3-e23d-42dd-85f2-e9e4961495e6
+0faebfcf-6202-4799-b302-40c258d546c3
+714a3c57-cfdf-4db4-81f7-8bc0b9119f51
+c4f0d33b-3b8d-46b7-af89-2f5cce9d495a
+\N
+0a00b315-9668-493d-ab38-48d20cb5756f
+a0a20648-1759-4330-94a3-e39746fdb30a
+02890263-b147-4323-b59b-d533ea9d436e
+ed12380e-14ff-4ec2-a47d-1b57dcd9cd68
+6d23b5ae-ca06-4aa7-a282-d96315c3ba83
+dac7a9bc-97e3-46e5-a543-ee4071ae9f0a
+65995243-a887-407f-b5de-f6b25b07e3a5
+4831f146-46fe-4d57-9569-80852b0f655b
+46037b99-bc54-4266-94cb-384c062346e4
+10661769-8d53-476e-9be4-440258481fdd
+5ddd5380-3f03-4a76-b682-b65e1ff1a431
+5e1fe5a5-14fd-46e9-afd2-54908de1582b
+62c070e4-7647-4c21-a4dd-cd8203ee6b20
+\N
+efd4c5c8-629d-41be-8982-e3b8352f96e2
+16a2c85a-f455-45bc-8a29-38f7f664fc7c
+0360c257-6ea8-4d6e-834f-43f37a7d8f4e
+e2562225-53fe-457e-b538-c089d3946aec
+ddbaebbe-8294-4f8d-8452-4a46d1b43c53
+dca63b45-648f-4b4c-a36a-c53f3e0abe28
+04e91983-21f7-408b-b4eb-aa6d9359f37a
+905c498a-4b99-49af-89cd-fd1d022193e7
+413d7f85-6bdd-4d50-8859-afd317c841c4
+412ca3ee-90e5-46ae-adf3-80aa6ceee633
+1a8020e7-8671-46e4-b2d3-705e206723bf
+9414f47f-088c-4fb1-98f2-bc020b0d550b
+8f48e9ef-ae2c-4d0a-acc6-8d4b18622df5
+614cbf04-fb3b-4678-834b-da05f70bd529
+0c96ce68-9135-4199-a351-05f9dfc641e2
+f3dfa1a0-c156-435e-9f2c-662c345b92db
+\N
+d7c6b8cc-2d67-41c6-bae0-3ab23f8ad65c
+b8ae6503-dbed-4455-85ce-6b985b4338ff
+3ba1530c-911b-463d-aa61-5d81850b5fcb
+\N
+0f71da52-80da-4bc5-88f7-013603f8ef06
+f59318ec-1851-4beb-b02e-6e9ca7f2391a
+8b712321-af06-4af2-8654-1e174851ae59
+\N
+\N
+c30e01ed-17eb-495d-8381-dc87cd280002
+10537620-20b9-4706-a1fb-ec470349e4d8
+bc669e4a-bef3-4635-a3c6-47e70a307e70
+5d965491-8d0d-4f7b-bab4-b615dd97dbcd
+fa76b0d2-1c46-4855-9381-3ec02b21b475
+311c71c5-e5e2-4224-aa57-fc79adb0d037
+82f18b3d-dd3c-402c-a54b-afe92a8b4582
+8a39cdbf-39c5-41a9-a4cb-c334cccc0414
+396ed0f3-8c28-40a8-a5d0-b41d2448c618
+4dc37b03-f161-4436-bb41-3e114f78bb96
+1de1ff4c-5b47-40d2-a002-ff331900c4ec
+7b248f50-920b-45f3-b20b-19d75590ef3d
+\N
+eba46805-9b82-4ebd-84d2-5aa6cb3d8a48
+2fef1c4c-d97c-423f-923d-cbac15961fc5
+7ae4af7a-3759-4ecb-9d3f-ed5e124ab08c
+f812e63b-20a4-4f58-90be-e6c7357d89fd
+cd91b9fe-5daa-4087-94a7-459c54d24d39
+54f4f7a3-c581-4bff-9bc1-82d8aaec2d3c
+\N
+d900a862-a0b2-4776-b418-af075881c53d
+593143fa-99b6-464b-b563-33e201668db0
+1d8ffc5d-1011-41d5-b3bc-18e0fe5b7375
+93773b54-be94-4b99-9bcc-e181f1b09978
+\N
+7fe6a809-a67e-46ef-b686-4a982a6f6fc1
+00c65908-2e09-4974-8c61-37ec926e74fc
+d6f2cc12-6d93-4159-b247-70db9120217b
+29806fd9-24bb-4e50-a228-8ad6c17559ab
+bd093e48-01be-4a09-a8f0-33a2bfcf23a7
+13d3db20-68e0-4cb2-8530-90648e6a756c
+b5bb9551-bb70-4589-a12c-15350d85232b
+6736895a-671b-435c-85b7-133c65b09cc8
+a609184a-9035-4b75-b10e-838465bace14
+98c084a0-9c30-4dc0-b8a2-2b818f650034
+122afe72-60e5-497a-92f7-c8139339f999
+6b2b6d77-f49c-4b37-a57f-c8ab6f8deff3
+6e133f18-5a70-4717-a750-1c2ee9ab459a
+\N
+\N
+96e2dc30-cc7e-4c9e-bbbc-e4af1ce9b5f6
+0a430277-f67d-44df-88e6-3ae2e78b8a1e
+15445ff0-2087-4fa6-857e-baba197a3ec9
+5a8aba7a-1feb-4acc-b57d-2520233ec15f
+a17ef384-5204-4240-a493-7db5dc28a6b2
+bfa0fd7d-7d18-4c57-a066-c94be05d0730
+4de53a8d-d76e-446e-9b68-48618314f2c4
+21cb20a2-36ab-4756-8925-cf8bade61148
+a2376936-9836-4397-a3eb-e779e498ab2d
+f4b3d211-79bb-4256-a460-26dac56d9755
+4c4be10f-5141-429a-9ea6-24eb1fbd5330
+a45bb987-b935-44ae-a410-c82b1f895eb5
+b341e29a-e069-4ba2-b2ba-279b53c1fcda
+68a4e4f8-9d7b-48f0-8614-e2a2072c859e
+dc20e7f1-b79d-4135-b90c-cd87a265169a
+1c5bfa73-8814-4f5d-9718-a12417814c41
+3000c028-3656-4455-b095-0b9f5ab1dc9e
+\N
+3d13811e-7b7a-4779-ab87-5257a1c702d1
+a9ea4067-e53c-484c-87ae-bdb218fbec40
+9c3925cc-4dba-4dd4-8000-c646f45db751
+f2e7334b-9840-4e9c-aff4-d7ac58ecbd91
+889d6968-a515-4458-a353-4a3d8d7528bd
+dc02f6d6-fec3-4c2f-ace2-6a124a61f079
+9f48ed94-313c-4607-9c23-d3a1b20eaca4
+\N
+\N
+2428f056-4dc2-4db1-a110-20bb54a3037a
+3cc8dd23-fd3a-4855-8ffa-23d1efa4fdbc
+bd3e2f1d-5869-42ff-b1c5-3f65ae2d1974
+\N
+\N
+8330c73d-0dc5-4caa-be02-10e136137804
+f202e559-6ab2-4b72-a6e3-1bf16cfe8bf5
+54e42957-25fa-46c7-a939-eaaa4b54a5b4
+\N
+7c3b2d81-44ca-49d1-8b08-c33f691c4f3c
+\N
+5083bb0b-7fa7-4ee1-8e51-ce20ee53a16c
+ffdd64de-2c27-4858-8baf-b179f0fa690c
+6e4632ce-f908-4c13-a15a-ac5cdca38c76
+e428a015-0cca-4b09-bb5c-ee4bffbd2de7
+\N
+\N
+d7a270ce-7ac4-49bf-a531-e56960f56850
+80b82b49-3984-4b6f-8690-7578f992d987
+824d6c6a-273d-4bfe-8ca6-197c8477d6c1
+\N
+\N
+5dddc93c-c4c9-44fc-8916-a826089245a4
+21357386-e17f-457e-93b4-77295904e67f
+f9598cd5-3c4c-457c-b6d8-11049bbc94b1
+05f7fbbb-1660-42da-a154-de4aa6cce4df
+a3b6aeef-e8b5-4692-944f-eb5edfd6a0b5
+cb8f1dea-fefa-470f-9a9c-1e169df844ec
+\N
+4b12759f-10e0-478c-b2d4-7c71be9f837e
+3688c161-bc5a-40a4-a9a9-6854b623a139
+\N
+a6a6ac8a-b805-4f15-bc8e-71c3679221e2
+ddcdad12-0919-4b8b-acc8-e775aaf6b6a1
+0dcce500-a4d6-4d34-916b-686cc04ceaa9
+2190bba9-e7e6-413a-85e3-40735e791c1b
+3f06e070-1530-47a7-8898-a94d82ea59b8
+bfb7ef50-9a5c-4341-a65d-c7b9ccb76d39
+70972a38-8f23-478c-abfd-9dbdca17dd01
+3c6ba50f-9197-4f0b-bf46-ca51aef246d1
+fb4598bf-ffa0-470e-b8eb-2d704fc08bd5
+fb2bd46e-6f43-4b2a-a122-dd6539ccc03c
+49f8d0a6-b7b0-444b-90dd-1cb5d08e95e5
+\N
+e8b02af9-8671-49d3-a1bf-86e222fe4ecc
+\N
+26c6af97-9ffc-43bd-a926-da45469c3c52
+773ec08a-9d02-4a8f-87e9-f4460d703952
+286f3446-6e5c-4b91-bb47-d6106346369f
+\N
+2e1c1f21-8cdb-4a33-96b2-85b3eec11e41
+d7b3ebc0-62a0-419a-a710-f9950d012f92
+3edde810-79fd-4f1f-aa10-4aa472d9384a
+a13013f8-e1cf-4902-9c95-8177d8a220a9
+b7c226d3-d115-4bf8-b03c-e7f14eee2169
+91a75836-a7a6-488b-806a-e8f948c8eb46
+b30da379-97b3-4a94-b6a2-2064767d2e52
+befec357-cf20-4712-9805-34910608e2fd
+ca95130e-1c44-4733-a872-c0ce24d1b3c1
+\N
+f5001512-9140-43c9-a4c8-54e26f71f1cf
+a3f2283e-50dd-40b9-83f3-b50dca485209
+3f7ac41e-bc09-4b74-8665-bdde3ebe47a7
+761e1883-d06e-4360-9df5-5c5caeef905c
+98cffbaf-dc52-4674-ade0-f930a70b64b2
+370e189c-b821-41ae-b748-a60b6d7660eb
+12563667-28cd-478d-b53c-442f7bf12c67
+\N
+29fe6754-ba91-49b4-bfb7-12b3dc03081c
+87aa5cc1-3332-4bf3-a669-5bb61e56a7ee
+\N
+8bcd3587-03fd-44a0-aab8-b8aca2bc9eb9
+9ae7c0f8-6038-48af-b01f-0c5bccac6c8f
+\N
+3e98dbdb-b10f-4f6d-a87a-c8f1f2b1e22a
+cd53b5ef-38fa-4d68-a8ff-4eb07f4162b5
+514644a8-fe52-4bab-8bb8-4cb8c7ef7acc
+\N
+b01b6978-46f9-46ec-844d-1322be2cfcb2
+04675ff9-2d55-413d-a3d0-4ac0ff1d3a54
+a7ee0137-c56a-472c-b4cb-dc41f1177ef0
+bc41bfcd-e5f0-423d-82f7-ffb7da97b5dd
+55824064-db88-4077-ad90-945d878e88ba
+a30050dd-1a17-4659-b3a6-c4c182ff0184
+82ffa955-b664-4503-9b1c-095404dbff48
+91d67d53-ec53-4dae-95ca-da25c1cfae7d
+da144505-c151-42a4-b8d5-19ff810ae6ea
+cfa1ddf5-6149-4896-940e-5dc57e4ec766
+c3a56789-f97f-4d5a-b70a-7d24da43bc5f
+4547a150-68f8-4984-b7b4-67ee92315b6a
+6b6d5e01-b18d-4afd-8b6e-8c3af536efe4
+bf723c93-f506-4990-9e97-b65476044b30
+c3ec8969-1f70-4b19-977b-237ede99a6a5
+78f11bd7-7a10-42ae-9475-eb16ca80f1e0
+cb3bf2fe-2d6c-47e7-b1e7-ce3254d2f800
+3842e996-3d91-4cab-8cce-da007a08328b
+4c55e078-603b-4d4e-9c77-c747960f6aff
+16d9f806-448d-48a1-9473-4d30df05aab8
+4611148b-cf71-40e6-829f-95ae0f2c8094
+9f3bcfbc-24ea-4105-8cb8-37a0924ff5e2
+4fac2eaa-2bd1-4d9c-bd80-a7ae083efcf9
+dc5546ca-99fc-4c1d-9559-b2ed9cd3d2aa
+29721775-9930-4f6c-af20-bb5b5f8d0d73
+f39a6eac-e7d5-4124-9a65-9508bfc53920
+\N
+7bc9960c-cf4f-4cca-a752-b28c5805ae01
+0e1c03e3-2cca-4bc8-b160-d6c2e888c182
+b12bb0d6-45d3-4608-9992-be9804a09448
+31bc67c6-1293-47c0-9732-5094e0b996bc
+a262ce01-cdf9-47f4-8f48-e94d4b9d73ce
+bc7150a9-0593-444c-b7b7-cc142348f1b4
+2f1e9e36-7e1b-49a5-a83e-b330267b5051
+f919c11a-b74f-4543-9798-da31133f90b5
+8672777e-a462-4042-9604-4392bbbd3308
+37b53421-3c74-40f5-9884-b83033e9f596
+1f843010-c79a-4bd4-a0a5-0251e0389722
+c51ecb09-45b4-40ff-9934-877c168c5038
+131335ce-a059-49c4-81ae-c9d98211ff9c
+f467b40d-0c6e-40b9-9959-2f7f466f18e2
+44076ea8-1103-4086-9e7d-8bfb9a65893d
+79ca8799-36a4-4982-9cd7-bf93fca45d74
+82569d43-65a3-44d7-8836-2db6de03e6f7
+cb5380e0-b075-48e6-8a9a-eee854444d34
+db88f31c-ce62-43f3-9781-8a8404e6ba39
+78f33ab0-a744-4bf1-82a7-c0b319492607
+2e4b580e-7d69-42c8-9f1d-7f232a3ae74c
+427b3d53-2792-47d3-9d45-087b30568413
+40518971-9590-45fb-9219-242ab3053547
+fe49087f-d8c1-4769-b814-fd8bc1611b5c
+27f8a8ab-671e-4eb2-88b4-2ad41814df1c
+39ebe842-6c44-4fb1-a629-3f86323ca5ea
+4a341b56-3523-4163-8563-83b9db172673
+513dd3a8-7354-47e7-9eef-d2a9e59a0e18
+b8e38294-7be9-4c39-b80f-bf2c9acfe69f
+e1fa23c2-b0b6-47b3-82c9-eef6e930af08
+1b86903e-c395-42c7-b9ad-1a71a1fb52d1
+632161a2-474e-450c-9b70-0f09f512bcba
+73f00c2b-ea38-46bb-aae3-4cf205572baf
+013839fd-03de-4fe5-a08c-466670de6cbd
+5d951cfe-d988-4b69-bce8-37d66598cbc3
+4da7e8f1-edaf-404a-bd1b-e8dd3a838fe2
+1f2c1809-8b85-48e8-ada4-1fdb418fea0e
+\N
+1bde5bbb-5d63-4d00-b227-1a706315eaa1
+f7ebf8f8-609f-4ce9-b93c-54759305926e
+\N
+7c2dd991-9377-4001-8486-7f3c3a6bae9c
+9fe1e97c-718b-4cf2-b270-4e0b664aaf27
+2141a8f5-da01-47cc-8104-6dd28874d8ac
+304096e8-b118-41e0-8174-32dc8e1fc45d
+9d5fac3d-e6f2-4341-9e59-9a155bef7b17
+b42cebe1-f01f-4409-bfc2-150aa9f13159
+91adc8a2-266c-4196-99ff-1de1c361c3ef
+54d26aee-0309-4af7-9b12-bbb24eb3e4e1
+bd449351-c50a-43b2-9742-1bcb838d4d04
+9fe70798-e3bd-448a-b461-e462702a9aca
+\N
+c8ef8969-1332-481b-909a-340ff3fd4473
+64c68c64-f815-4bd7-b0aa-ba68bb15f611
+9f271158-ff4e-41a6-a883-913f2b36ae68
+\N
+b1082d66-0065-41ac-9bc5-dcea0bbec070
+\N
+3ac2d674-2e12-4db1-b998-2470cba43b11
+\N
+3061f573-96e9-4307-a683-df8ab30531a5
+01ce8c0e-7672-4023-be71-5dfae5ffa7d2
+06a9e327-29ea-4913-b6b9-90781484eff4
+9735f9eb-89b3-4f42-bfb5-e2bb208b640a
+21ef890c-1c8c-4890-8c6d-851eebe68f40
+c35686c4-cfcc-48ff-b6d9-7c8da68dceb1
+3f08e734-1f52-42b5-ba89-738582a7f5b4
+12975217-8a58-4a95-9ede-4ceb0a487a67
+97e186f8-28a7-4340-b781-cd13168daf99
+2336ce4b-3d57-46f4-b460-cdeb89c81fcd
+e824b114-66e0-441f-aa94-27feb7a3f672
+b8bf5230-0174-4f16-9470-dd476b9675d6
diff --git a/contrib/btree_gist/expected/uuid.out b/contrib/btree_gist/expected/uuid.out
new file mode 100644
index 0000000000..a34b024603
--- /dev/null
+++ b/contrib/btree_gist/expected/uuid.out
@@ -0,0 +1,66 @@
+-- uuid check
+CREATE TABLE uuidtmp (a uuid);
+\copy uuidtmp from 'data/uuid.data'
+SET enable_seqscan=on;
+SELECT count(*) FROM uuidtmp WHERE a < '55e65ca2-4136-4a4b-ba78-cd3fe4678203';
+ count
+-------
+ 227
+(1 row)
+
+SELECT count(*) FROM uuidtmp WHERE a <= '55e65ca2-4136-4a4b-ba78-cd3fe4678203';
+ count
+-------
+ 228
+(1 row)
+
+SELECT count(*) FROM uuidtmp WHERE a = '55e65ca2-4136-4a4b-ba78-cd3fe4678203';
+ count
+-------
+ 1
+(1 row)
+
+SELECT count(*) FROM uuidtmp WHERE a >= '55e65ca2-4136-4a4b-ba78-cd3fe4678203';
+ count
+-------
+ 376
+(1 row)
+
+SELECT count(*) FROM uuidtmp WHERE a > '55e65ca2-4136-4a4b-ba78-cd3fe4678203';
+ count
+-------
+ 375
+(1 row)
+
+CREATE INDEX uuididx ON uuidtmp USING gist ( a );
+SET enable_seqscan=off;
+SELECT count(*) FROM uuidtmp WHERE a < '55e65ca2-4136-4a4b-ba78-cd3fe4678203'::uuid;
+ count
+-------
+ 227
+(1 row)
+
+SELECT count(*) FROM uuidtmp WHERE a <= '55e65ca2-4136-4a4b-ba78-cd3fe4678203'::uuid;
+ count
+-------
+ 228
+(1 row)
+
+SELECT count(*) FROM uuidtmp WHERE a = '55e65ca2-4136-4a4b-ba78-cd3fe4678203'::uuid;
+ count
+-------
+ 1
+(1 row)
+
+SELECT count(*) FROM uuidtmp WHERE a >= '55e65ca2-4136-4a4b-ba78-cd3fe4678203'::uuid;
+ count
+-------
+ 376
+(1 row)
+
+SELECT count(*) FROM uuidtmp WHERE a > '55e65ca2-4136-4a4b-ba78-cd3fe4678203'::uuid;
+ count
+-------
+ 375
+(1 row)
+
diff --git a/contrib/btree_gist/sql/uuid.sql b/contrib/btree_gist/sql/uuid.sql
new file mode 100644
index 0000000000..3f7ad764e2
--- /dev/null
+++ b/contrib/btree_gist/sql/uuid.sql
@@ -0,0 +1,31 @@
+-- uuid check
+
+CREATE TABLE uuidtmp (a uuid);
+
+\copy uuidtmp from 'data/uuid.data'
+
+SET enable_seqscan=on;
+
+SELECT count(*) FROM uuidtmp WHERE a < '55e65ca2-4136-4a4b-ba78-cd3fe4678203';
+
+SELECT count(*) FROM uuidtmp WHERE a <= '55e65ca2-4136-4a4b-ba78-cd3fe4678203';
+
+SELECT count(*) FROM uuidtmp WHERE a = '55e65ca2-4136-4a4b-ba78-cd3fe4678203';
+
+SELECT count(*) FROM uuidtmp WHERE a >= '55e65ca2-4136-4a4b-ba78-cd3fe4678203';
+
+SELECT count(*) FROM uuidtmp WHERE a > '55e65ca2-4136-4a4b-ba78-cd3fe4678203';
+
+CREATE INDEX uuididx ON uuidtmp USING gist ( a );
+
+SET enable_seqscan=off;
+
+SELECT count(*) FROM uuidtmp WHERE a < '55e65ca2-4136-4a4b-ba78-cd3fe4678203'::uuid;
+
+SELECT count(*) FROM uuidtmp WHERE a <= '55e65ca2-4136-4a4b-ba78-cd3fe4678203'::uuid;
+
+SELECT count(*) FROM uuidtmp WHERE a = '55e65ca2-4136-4a4b-ba78-cd3fe4678203'::uuid;
+
+SELECT count(*) FROM uuidtmp WHERE a >= '55e65ca2-4136-4a4b-ba78-cd3fe4678203'::uuid;
+
+SELECT count(*) FROM uuidtmp WHERE a > '55e65ca2-4136-4a4b-ba78-cd3fe4678203'::uuid;
diff --git a/doc/src/sgml/btree-gist.sgml b/doc/src/sgml/btree-gist.sgml
index e8a5622704..d08647ce05 100644
--- a/doc/src/sgml/btree-gist.sgml
+++ b/doc/src/sgml/btree-gist.sgml
@@ -16,7 +16,8 @@
time without time zone>, date>, interval>,
oid>, money>, char>,
varchar>, text>, bytea>, bit>,
- varbit>, macaddr>, inet>, and cidr>.
+ varbit>, macaddr>, inet>, cidr>,
+ and uuid>.
@@ -99,8 +100,9 @@ INSERT 0 1
Teodor Sigaev (teodor@stack.net),
- Oleg Bartunov (oleg@sai.msu.su), and
- Janko Richter (jankorichter@yahoo.de). See
+ Oleg Bartunov (oleg@sai.msu.su),
+ Janko Richter (jankorichter@yahoo.de), and
+ Paul Jungwirth (pj@illuminatedcomputing.com). See
for additional information.
diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c
index c59e74da3d..bea630e792 100644
--- a/src/backend/utils/adt/uuid.c
+++ b/src/backend/utils/adt/uuid.c
@@ -22,15 +22,6 @@
#include "utils/sortsupport.h"
#include "utils/uuid.h"
-/* uuid size in bytes */
-#define UUID_LEN 16
-
-/* pg_uuid_t is declared to be struct pg_uuid_t in uuid.h */
-struct pg_uuid_t
-{
- unsigned char data[UUID_LEN];
-};
-
/* sortsupport for uuid */
typedef struct
{
diff --git a/src/include/utils/uuid.h b/src/include/utils/uuid.h
index 897382f2e5..c6766805e0 100644
--- a/src/include/utils/uuid.h
+++ b/src/include/utils/uuid.h
@@ -14,11 +14,13 @@
#ifndef UUID_H
#define UUID_H
-/* guid size in bytes */
+/* uuid size in bytes */
#define UUID_LEN 16
-/* opaque struct; defined in uuid.c */
-typedef struct pg_uuid_t pg_uuid_t;
+typedef struct pg_uuid_t
+{
+ unsigned char data[UUID_LEN];
+} pg_uuid_t;
/* fmgr interface macros */
#define UUIDPGetDatum(X) PointerGetDatum(X)
--
cgit v1.2.3
From 2f0c7ff48b68b6fb6dc373ffcebd99343a9f6451 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Wed, 30 Nov 2016 12:00:00 -0500
Subject: doc: Fix typo
From: Tsunakawa, Takayuki
---
doc/src/sgml/config.sgml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index d8d207e5eb..bcfe2fded2 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1302,7 +1302,7 @@ include_dir 'conf.d'
If you have a dedicated database server with 1GB or more of RAM, a
reasonable starting value for shared_buffers is 25%
of the memory in your system. There are some workloads where even
- large settings for shared_buffers are effective, but
+ larger settings for shared_buffers are effective, but
because PostgreSQL also relies on the
operating system cache, it is unlikely that an allocation of more than
40% of RAM to shared_buffers will work better than a
--
cgit v1.2.3
From 81c52728f82be5303ea16508255e948017f4cd87 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Wed, 30 Nov 2016 12:00:00 -0500
Subject: doc: Remove claim about large shared_buffers on Windows
Testing has shown that it is no longer correct.
From: Tsunakawa, Takayuki
Reviewed-by: amul sul
Discussion: http://www.postgresql.org/message-id/flat/0A3221C70F24FB45833433255569204D1F5EE995@G01JPEXMBYT05/
---
doc/src/sgml/config.sgml | 5 -----
1 file changed, 5 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index bcfe2fded2..b1c5289fcc 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1316,11 +1316,6 @@ include_dir 'conf.d'
On systems with less than 1GB of RAM, a smaller percentage of RAM is
appropriate, so as to leave adequate space for the operating system.
- Also, on Windows, large values for shared_buffers
- aren't as effective. You may find better results keeping the setting
- relatively low and using the operating system cache more instead. The
- useful range for shared_buffers on Windows systems
- is generally from 64MB to 512MB.
--
cgit v1.2.3
From 213c0f2d7880f78c710127920cf4bf7017e0fa57 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Wed, 30 Nov 2016 13:34:13 -0500
Subject: Doc: improve description of trim() and related functions.
Per bug #14441 from Mark Pether, the documentation could be misread,
mainly because some of the examples failed to show what happens with
a multicharacter "characters to trim" string. Also, while the text
description in most of these entries was fairly clear that the
"characters" argument is a set of characters not a substring to match,
some of them used variant wording that was a bit less clear.
trim() itself suffered from both deficiencies and was thus pretty
misinterpretable.
Also fix failure to explain which of LEADING/TRAILING/BOTH is the
default.
Discussion: https://postgr.es/m/20161130011710.6539.53657@wrigleys.postgresql.org
---
doc/src/sgml/func.sgml | 33 +++++++++++++++++----------------
1 file changed, 17 insertions(+), 16 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 2e64cc430c..eca98dfd34 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -1534,11 +1534,12 @@
text
- Remove the longest string containing only the
+ Remove the longest string containing only characters from
characters (a space by default) from the
- start/end/both ends of the string
+ start, end, or both ends (both> is the default)
+ of string
- trim(both 'x' from 'xTomxx')
+ trim(both 'xyz' from 'yxTomxx')Tom
@@ -1547,14 +1548,14 @@
trim(leading | trailing
| bothfromstring
- , characters
+ , characters
)text
- Non-standard version of trim()>
+ Non-standard syntax for trim()>
- trim(both from 'xTomxx', 'x')
+ trim(both from 'yxTomxx', 'xyz')Tom
@@ -1626,7 +1627,7 @@
in characters (a space by default)
from the start and end of string
- btrim('xyxtrimyyx', 'xy')
+ btrim('xyxtrimyyx', 'xyz')trim
@@ -1895,8 +1896,8 @@
characters (a space by default) from the start of
string
- ltrim('zzzytrim', 'xyz')
- trim
+ ltrim('zzzytest', 'xyz')
+ test
@@ -2218,8 +2219,8 @@
characters (a space by default) from the end of
string
- rtrim('trimxxxx', 'x')
- trim
+ rtrim('testxxzx', 'xyz')
+ test
@@ -3484,11 +3485,11 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
bytea
- Remove the longest string containing only the bytes in
+ Remove the longest string containing only bytes appearing in
bytes from the start
and end of string
- trim(E'\\000'::bytea from E'\\000Tom\\000'::bytea)
+ trim(E'\\000\\001'::bytea from E'\\000Tom\\001'::bytea)Tom
@@ -3527,11 +3528,11 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
bytea
- Remove the longest string consisting only of bytes
- in bytes from the start and end of
+ Remove the longest string containing only bytes appearing in
+ bytes from the start and end of
string
- btrim(E'\\000trim\\000'::bytea, E'\\000'::bytea)
+ btrim(E'\\000trim\\001'::bytea, E'\\000\\001'::bytea)trim
--
cgit v1.2.3
From a01a5013d9fa223a2e1b8ec1c6c6c2a47a6b808a Mon Sep 17 00:00:00 2001
From: Michael Meskes
Date: Thu, 1 Dec 2016 12:26:50 +0100
Subject: Added missing "EXEC SQL" to statement.
---
doc/src/sgml/ecpg.sgml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ecpg.sgml b/doc/src/sgml/ecpg.sgml
index a30e25cfa0..b8021cbe5b 100644
--- a/doc/src/sgml/ecpg.sgml
+++ b/doc/src/sgml/ecpg.sgml
@@ -517,7 +517,7 @@ EXEC SQL COMMIT;
- SET AUTOCOMMIT TO OFF
+ EXEC SQL SET AUTOCOMMIT TO OFF
Disable autocommit mode. This is the default.
--
cgit v1.2.3
From b460f5d6693103076dc554aa7cbb96e1e53074f9 Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Fri, 2 Dec 2016 07:42:58 -0500
Subject: Add max_parallel_workers GUC.
Increase the default value of the existing max_worker_processes GUC
from 8 to 16, and add a new max_parallel_workers GUC with a maximum
of 8. This way, even if the maximum amount of parallel query is
happening, there is still room for background workers that do other
things, as originally envisioned when max_worker_processes was added.
Julien Rouhaud, reviewed by Amit Kapila and by revised by me.
---
doc/src/sgml/config.sgml | 23 ++++++++++++--
src/backend/access/transam/parallel.c | 3 +-
src/backend/postmaster/bgworker.c | 45 ++++++++++++++++++++++++++-
src/backend/utils/init/globals.c | 3 +-
src/backend/utils/misc/guc.c | 12 ++++++-
src/backend/utils/misc/postgresql.conf.sample | 3 +-
src/bin/pg_resetxlog/pg_resetxlog.c | 4 +--
src/include/miscadmin.h | 1 +
src/include/postmaster/bgworker.h | 9 ++++++
9 files changed, 93 insertions(+), 10 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index b1c5289fcc..fdf8b3e9c7 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1982,7 +1982,7 @@ include_dir 'conf.d'
Sets the maximum number of background processes that the system
can support. This parameter can only be set at server start. The
- default is 8.
+ default is 16.
@@ -2004,8 +2004,9 @@ include_dir 'conf.d'
Sets the maximum number of workers that can be started by a single
Gather node. Parallel workers are taken from the
pool of processes established by
- . Note that the requested
- number of workers may not actually be available at run time. If this
+ , limited by
+ . Note that the requested
+ number of workers may not actually be available at runtime. If this
occurs, the plan will run with fewer workers than expected, which may
be inefficient. The default value is 2. Setting this value to 0
disables parallel query execution.
@@ -2034,6 +2035,22 @@ include_dir 'conf.d'
+
+ max_parallel_workers (integer)
+
+ max_parallel_workers> configuration parameter
+
+
+
+
+ Sets the maximum number of workers that the system can support for
+ parallel queries. The default value is 8. When increasing or
+ decreasing this value, consider also adjusting
+ .
+
+
+
+
backend_flush_after (integer)
diff --git a/src/backend/access/transam/parallel.c b/src/backend/access/transam/parallel.c
index 59dc3949d8..1c32fcd046 100644
--- a/src/backend/access/transam/parallel.c
+++ b/src/backend/access/transam/parallel.c
@@ -454,7 +454,8 @@ LaunchParallelWorkers(ParallelContext *pcxt)
snprintf(worker.bgw_name, BGW_MAXLEN, "parallel worker for PID %d",
MyProcPid);
worker.bgw_flags =
- BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION;
+ BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION
+ | BGWORKER_CLASS_PARALLEL;
worker.bgw_start_time = BgWorkerStart_ConsistentState;
worker.bgw_restart_time = BGW_NEVER_RESTART;
worker.bgw_main = ParallelWorkerMain;
diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c
index 92495850db..13a6e23a14 100644
--- a/src/backend/postmaster/bgworker.c
+++ b/src/backend/postmaster/bgworker.c
@@ -80,9 +80,22 @@ typedef struct BackgroundWorkerSlot
BackgroundWorker worker;
} BackgroundWorkerSlot;
+/*
+ * In order to limit the total number of parallel workers (according to
+ * max_parallel_workers GUC), we maintain the number of active parallel
+ * workers. Since the postmaster cannot take locks, two variables are used for
+ * this purpose: the number of registered parallel workers (modified by the
+ * backends, protected by BackgroundWorkerLock) and the number of terminated
+ * parallel workers (modified only by the postmaster, lockless). The active
+ * number of parallel workers is the number of registered workers minus the
+ * terminated ones. These counters can of course overflow, but it's not
+ * important here since the subtraction will still give the right number.
+ */
typedef struct BackgroundWorkerArray
{
int total_slots;
+ uint32 parallel_register_count;
+ uint32 parallel_terminate_count;
BackgroundWorkerSlot slot[FLEXIBLE_ARRAY_MEMBER];
} BackgroundWorkerArray;
@@ -127,6 +140,8 @@ BackgroundWorkerShmemInit(void)
int slotno = 0;
BackgroundWorkerData->total_slots = max_worker_processes;
+ BackgroundWorkerData->parallel_register_count = 0;
+ BackgroundWorkerData->parallel_terminate_count = 0;
/*
* Copy contents of worker list into shared memory. Record the shared
@@ -267,9 +282,12 @@ BackgroundWorkerStateChange(void)
/*
* We need a memory barrier here to make sure that the load of
- * bgw_notify_pid completes before the store to in_use.
+ * bgw_notify_pid and the update of parallel_terminate_count
+ * complete before the store to in_use.
*/
notify_pid = slot->worker.bgw_notify_pid;
+ if ((slot->worker.bgw_flags & BGWORKER_CLASS_PARALLEL) != 0)
+ BackgroundWorkerData->parallel_terminate_count++;
pg_memory_barrier();
slot->pid = 0;
slot->in_use = false;
@@ -370,6 +388,9 @@ ForgetBackgroundWorker(slist_mutable_iter *cur)
Assert(rw->rw_shmem_slot < max_worker_processes);
slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
+ if ((rw->rw_worker.bgw_flags & BGWORKER_CLASS_PARALLEL) != 0)
+ BackgroundWorkerData->parallel_terminate_count++;
+
slot->in_use = false;
ereport(DEBUG1,
@@ -824,6 +845,7 @@ RegisterDynamicBackgroundWorker(BackgroundWorker *worker,
{
int slotno;
bool success = false;
+ bool parallel;
uint64 generation = 0;
/*
@@ -840,8 +862,27 @@ RegisterDynamicBackgroundWorker(BackgroundWorker *worker,
if (!SanityCheckBackgroundWorker(worker, ERROR))
return false;
+ parallel = (worker->bgw_flags & BGWORKER_CLASS_PARALLEL) != 0;
+
LWLockAcquire(BackgroundWorkerLock, LW_EXCLUSIVE);
+ /*
+ * If this is a parallel worker, check whether there are already too many
+ * parallel workers; if so, don't register another one. Our view of
+ * parallel_terminate_count may be slightly stale, but that doesn't really
+ * matter: we would have gotten the same result if we'd arrived here
+ * slightly earlier anyway. There's no help for it, either, since the
+ * postmaster must not take locks; a memory barrier wouldn't guarantee
+ * anything useful.
+ */
+ if (parallel && (BackgroundWorkerData->parallel_register_count -
+ BackgroundWorkerData->parallel_terminate_count) >=
+ max_parallel_workers)
+ {
+ LWLockRelease(BackgroundWorkerLock);
+ return false;
+ }
+
/*
* Look for an unused slot. If we find one, grab it.
*/
@@ -856,6 +897,8 @@ RegisterDynamicBackgroundWorker(BackgroundWorker *worker,
slot->generation++;
slot->terminate = false;
generation = slot->generation;
+ if (parallel)
+ BackgroundWorkerData->parallel_register_count++;
/*
* Make sure postmaster doesn't see the slot as in use before it
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index f23208353c..c564ae396d 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -121,7 +121,8 @@ int replacement_sort_tuples = 150000;
*/
int NBuffers = 1000;
int MaxConnections = 90;
-int max_worker_processes = 8;
+int max_worker_processes = 16;
+int max_parallel_workers = 8;
int MaxBackends = 0;
int VacuumCostPageHit = 1; /* GUC parameters for vacuum */
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 28ebcb6f3f..4e49d5b79c 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2477,7 +2477,7 @@ static struct config_int ConfigureNamesInt[] =
NULL,
},
&max_worker_processes,
- 8, 0, MAX_BACKENDS,
+ 16, 0, MAX_BACKENDS,
check_max_worker_processes, NULL, NULL
},
@@ -2664,6 +2664,16 @@ static struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},
+ {
+ {"max_parallel_workers", PGC_USERSET, RESOURCES_ASYNCHRONOUS,
+ gettext_noop("Sets the maximum number of parallel workers than can be active at one time."),
+ NULL
+ },
+ &max_parallel_workers,
+ 8, 0, 1024,
+ NULL, NULL, NULL
+ },
+
{
{"autovacuum_work_mem", PGC_SIGHUP, RESOURCES_MEM,
gettext_noop("Sets the maximum memory to be used by each autovacuum worker process."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 0df15380a9..1f490c7de4 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -161,8 +161,9 @@
# - Asynchronous Behavior -
#effective_io_concurrency = 1 # 1-1000; 0 disables prefetching
-#max_worker_processes = 8 # (change requires restart)
+#max_worker_processes = 16 # (change requires restart)
#max_parallel_workers_per_gather = 2 # taken from max_worker_processes
+#max_parallel_workers = 8 # total maximum number of worker_processes
#old_snapshot_threshold = -1 # 1min-60d; -1 disables; 0 is immediate
# (change requires restart)
#backend_flush_after = 0 # measured in pages, 0 disables
diff --git a/src/bin/pg_resetxlog/pg_resetxlog.c b/src/bin/pg_resetxlog/pg_resetxlog.c
index 2b76f64079..1beee7f8eb 100644
--- a/src/bin/pg_resetxlog/pg_resetxlog.c
+++ b/src/bin/pg_resetxlog/pg_resetxlog.c
@@ -584,7 +584,7 @@ GuessControlValues(void)
ControlFile.wal_log_hints = false;
ControlFile.track_commit_timestamp = false;
ControlFile.MaxConnections = 100;
- ControlFile.max_worker_processes = 8;
+ ControlFile.max_worker_processes = 16;
ControlFile.max_prepared_xacts = 0;
ControlFile.max_locks_per_xact = 64;
@@ -800,7 +800,7 @@ RewriteControlFile(void)
ControlFile.wal_log_hints = false;
ControlFile.track_commit_timestamp = false;
ControlFile.MaxConnections = 100;
- ControlFile.max_worker_processes = 8;
+ ControlFile.max_worker_processes = 16;
ControlFile.max_prepared_xacts = 0;
ControlFile.max_locks_per_xact = 64;
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 78c9954365..d06eca54b4 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -157,6 +157,7 @@ extern PGDLLIMPORT int NBuffers;
extern int MaxBackends;
extern int MaxConnections;
extern int max_worker_processes;
+extern int max_parallel_workers;
extern PGDLLIMPORT int MyProcPid;
extern PGDLLIMPORT pg_time_t MyStartTime;
diff --git a/src/include/postmaster/bgworker.h b/src/include/postmaster/bgworker.h
index b6889a3320..b46d5818e7 100644
--- a/src/include/postmaster/bgworker.h
+++ b/src/include/postmaster/bgworker.h
@@ -58,6 +58,15 @@
*/
#define BGWORKER_BACKEND_DATABASE_CONNECTION 0x0002
+/*
+ * This class is used internally for parallel queries, to keep track of the
+ * number of active parallel workers and make sure we never launch more than
+ * max_parallel_workers parallel workers at the same time. Third party
+ * background workers should not use this class.
+ */
+#define BGWORKER_CLASS_PARALLEL 0x0010
+/* add additional bgworker classes here */
+
typedef void (*bgworker_main_type) (Datum main_arg);
--
cgit v1.2.3
From e63d41498837667a4e2e0a4b9416bfda28c722d6 Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Fri, 2 Dec 2016 08:58:41 -0500
Subject: Clarify that pg_stat_activity.query has a length limit.
There was always documentation of the GUC that controlled what the
limit actually was, but previously the documentation of the field
itself made no mention of that limit.
Ian Barwick
---
doc/src/sgml/monitoring.sgml | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 3de489e2f0..02dab879d9 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -785,7 +785,9 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
Text of this backend's most recent query. If
state> is active> this field shows the
currently executing query. In all other states, it shows the last query
- that was executed.
+ that was executed. By default the query text is truncated at 1024
+ characters; this value can be changed via the parameter
+ .
--
cgit v1.2.3
From b3427dade14cc31eb48740bc9ea98b5954470b24 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Fri, 2 Dec 2016 14:57:35 -0500
Subject: Delete deleteWhatDependsOn() in favor of more performDeletion() flag
bits.
deleteWhatDependsOn() had grown an uncomfortably large number of
assumptions about what it's used for. There are actually only two minor
differences between what it does and what a regular performDeletion() call
can do, so let's invent additional bits in performDeletion's existing flags
argument that specify those behaviors, and get rid of deleteWhatDependsOn()
as such. (We'd probably have done it this way from the start, except that
performDeletion didn't originally have a flags argument, IIRC.)
Also, add a SKIP_EXTENSIONS flag bit that prevents ever recursing to an
extension, and use that when dropping temporary objects at session end.
This provides a more general solution to the problem addressed in a hacky
way in commit 08dd23cec: if an extension script creates temp objects and
forgets to remove them again, the whole extension went away when its
contained temp objects were deleted. The previous solution only covered
temp relations, but this solves it for all object types.
These changes require minor additions in dependency.c to pass the flags
to subroutines that previously didn't get them, but it's still a net
savings of code, and it seems cleaner than before.
Having done this, revert the special-case code added in 08dd23cec that
prevented addition of pg_depend records for temp table extension
membership, because that caused its own oddities: dropping an extension
that had created such a table didn't automatically remove the table,
leading to a failure if the table had another dependency on the extension
(such as use of an extension data type), or to a duplicate-name failure if
you then tried to recreate the extension. But we keep the part that
prevents the pg_temp_nnn schema from becoming an extension member; we never
want that to happen. Add a regression test case covering these behaviors.
Although this fixes some arguable bugs, we've heard few field complaints,
and any such problems are easily worked around by explicitly dropping temp
objects at the end of extension scripts (which seems like good practice
anyway). So I won't risk a back-patch.
Discussion: https://postgr.es/m/e51f4311-f483-4dd0-1ccc-abec3c405110@BlueTreble.com
---
doc/src/sgml/extend.sgml | 12 +-
src/backend/catalog/dependency.c | 174 ++++++++-------------
src/backend/catalog/heap.c | 7 +-
src/backend/catalog/namespace.c | 11 +-
src/backend/postmaster/autovacuum.c | 5 +-
src/include/catalog/dependency.h | 13 +-
src/include/commands/extension.h | 11 +-
src/test/modules/test_extensions/Makefile | 4 +-
.../test_extensions/expected/test_extensions.out | 58 +++++++
.../test_extensions/sql/test_extensions.sql | 39 +++++
.../modules/test_extensions/test_ext8--1.0.sql | 21 +++
src/test/modules/test_extensions/test_ext8.control | 4 +
12 files changed, 229 insertions(+), 130 deletions(-)
create mode 100644 src/test/modules/test_extensions/test_ext8--1.0.sql
create mode 100644 src/test/modules/test_extensions/test_ext8.control
(limited to 'doc/src')
diff --git a/doc/src/sgml/extend.sgml b/doc/src/sgml/extend.sgml
index f9d91a3923..c4f211bc02 100644
--- a/doc/src/sgml/extend.sgml
+++ b/doc/src/sgml/extend.sgml
@@ -390,6 +390,15 @@
schema(s) its member objects are within.
+
+ If an extension's script creates any temporary objects (such as temp
+ tables), those objects are treated as extension members for the
+ remainder of the current session, but are automatically dropped at
+ session end, as any temporary object would be. This is an exception
+ to the rule that extension member objects cannot be dropped without
+ dropping the whole extension.
+
+
Extension Files
@@ -803,7 +812,8 @@ SELECT pg_catalog.pg_extension_config_dump('my_config', 'WHERE NOT standard_entr
environment that CREATE EXTENSION> provides for installation
scripts: in particular, search_path> is set up in the same
way, and any new objects created by the script are automatically added
- to the extension.
+ to the extension. Also, if the script chooses to drop extension member
+ objects, they are automatically dissociated from the extension.
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c
index f71d80fc1a..b697e88ef0 100644
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -168,6 +168,7 @@ static const Oid object_classes[] = {
static void findDependentObjects(const ObjectAddress *object,
+ int objflags,
int flags,
ObjectAddressStack *stack,
ObjectAddresses *targetObjects,
@@ -175,7 +176,7 @@ static void findDependentObjects(const ObjectAddress *object,
Relation *depRel);
static void reportDependentObjects(const ObjectAddresses *targetObjects,
DropBehavior behavior,
- int msglevel,
+ int flags,
const ObjectAddress *origObject);
static void deleteOneObject(const ObjectAddress *object,
Relation *depRel, int32 flags);
@@ -237,11 +238,17 @@ deleteObjectsInList(ObjectAddresses *targetObjects, Relation *depRel,
}
/*
- * Delete all the objects in the proper order.
+ * Delete all the objects in the proper order, except that if told to, we
+ * should skip the original object(s).
*/
for (i = 0; i < targetObjects->numrefs; i++)
{
ObjectAddress *thisobj = targetObjects->refs + i;
+ ObjectAddressExtra *thisextra = targetObjects->extras + i;
+
+ if ((flags & PERFORM_DELETION_SKIP_ORIGINAL) &&
+ (thisextra->flags & DEPFLAG_ORIGINAL))
+ continue;
deleteOneObject(thisobj, depRel, flags);
}
@@ -255,16 +262,32 @@ deleteObjectsInList(ObjectAddresses *targetObjects, Relation *depRel,
* according to the dependency type.
*
* This is the outer control routine for all forms of DROP that drop objects
- * that can participate in dependencies. Note that the next two routines
- * are variants on the same theme; if you change anything here you'll likely
- * need to fix them too.
+ * that can participate in dependencies. Note that performMultipleDeletions
+ * is a variant on the same theme; if you change anything here you'll likely
+ * need to fix that too.
+ *
+ * Bits in the flags argument can include:
+ *
+ * PERFORM_DELETION_INTERNAL: indicates that the drop operation is not the
+ * direct result of a user-initiated action. For example, when a temporary
+ * schema is cleaned out so that a new backend can use it, or when a column
+ * default is dropped as an intermediate step while adding a new one, that's
+ * an internal operation. On the other hand, when we drop something because
+ * the user issued a DROP statement against it, that's not internal. Currently
+ * this suppresses calling event triggers and making some permissions checks.
*
- * flags should include PERFORM_DELETION_INTERNAL when the drop operation is
- * not the direct result of a user-initiated action. For example, when a
- * temporary schema is cleaned out so that a new backend can use it, or when
- * a column default is dropped as an intermediate step while adding a new one,
- * that's an internal operation. On the other hand, when we drop something
- * because the user issued a DROP statement against it, that's not internal.
+ * PERFORM_DELETION_CONCURRENTLY: perform the drop concurrently. This does
+ * not currently work for anything except dropping indexes; don't set it for
+ * other object types or you may get strange results.
+ *
+ * PERFORM_DELETION_QUIETLY: reduce message level from NOTICE to DEBUG2.
+ *
+ * PERFORM_DELETION_SKIP_ORIGINAL: do not delete the specified object(s),
+ * but only what depends on it/them.
+ *
+ * PERFORM_DELETION_SKIP_EXTENSIONS: do not delete extensions, even when
+ * deleting objects that are part of an extension. This should generally
+ * be used only when dropping temporary objects.
*/
void
performDeletion(const ObjectAddress *object,
@@ -293,6 +316,7 @@ performDeletion(const ObjectAddress *object,
findDependentObjects(object,
DEPFLAG_ORIGINAL,
+ flags,
NULL, /* empty stack */
targetObjects,
NULL, /* no pendingObjects */
@@ -303,7 +327,7 @@ performDeletion(const ObjectAddress *object,
*/
reportDependentObjects(targetObjects,
behavior,
- NOTICE,
+ flags,
object);
/* do the deed */
@@ -364,6 +388,7 @@ performMultipleDeletions(const ObjectAddresses *objects,
findDependentObjects(thisobj,
DEPFLAG_ORIGINAL,
+ flags,
NULL, /* empty stack */
targetObjects,
objects,
@@ -378,7 +403,7 @@ performMultipleDeletions(const ObjectAddresses *objects,
*/
reportDependentObjects(targetObjects,
behavior,
- NOTICE,
+ flags,
(objects->numrefs == 1 ? objects->refs : NULL));
/* do the deed */
@@ -390,88 +415,6 @@ performMultipleDeletions(const ObjectAddresses *objects,
heap_close(depRel, RowExclusiveLock);
}
-/*
- * deleteWhatDependsOn: attempt to drop everything that depends on the
- * specified object, though not the object itself. Behavior is always
- * CASCADE.
- *
- * This is currently used only to clean out the contents of a schema
- * (namespace): the passed object is a namespace. We normally want this
- * to be done silently, so there's an option to suppress NOTICE messages.
- *
- * Note we don't fire object drop event triggers here; it would be wrong to do
- * so for the current only use of this function, but if more callers are added
- * this might need to be reconsidered.
- */
-void
-deleteWhatDependsOn(const ObjectAddress *object,
- bool showNotices)
-{
- Relation depRel;
- ObjectAddresses *targetObjects;
- int i;
-
- /*
- * We save some cycles by opening pg_depend just once and passing the
- * Relation pointer down to all the recursive deletion steps.
- */
- depRel = heap_open(DependRelationId, RowExclusiveLock);
-
- /*
- * Acquire deletion lock on the target object. (Ideally the caller has
- * done this already, but many places are sloppy about it.)
- */
- AcquireDeletionLock(object, 0);
-
- /*
- * Construct a list of objects to delete (ie, the given object plus
- * everything directly or indirectly dependent on it).
- */
- targetObjects = new_object_addresses();
-
- findDependentObjects(object,
- DEPFLAG_ORIGINAL,
- NULL, /* empty stack */
- targetObjects,
- NULL, /* no pendingObjects */
- &depRel);
-
- /*
- * Check if deletion is allowed, and report about cascaded deletes.
- */
- reportDependentObjects(targetObjects,
- DROP_CASCADE,
- showNotices ? NOTICE : DEBUG2,
- object);
-
- /*
- * Delete all the objects in the proper order, except we skip the original
- * object.
- */
- for (i = 0; i < targetObjects->numrefs; i++)
- {
- ObjectAddress *thisobj = targetObjects->refs + i;
- ObjectAddressExtra *thisextra = targetObjects->extras + i;
-
- if (thisextra->flags & DEPFLAG_ORIGINAL)
- continue;
-
- /*
- * Since this function is currently only used to clean out temporary
- * schemas, we pass PERFORM_DELETION_INTERNAL here, indicating that
- * the operation is an automatic system operation rather than a user
- * action. If, in the future, this function is used for other
- * purposes, we might need to revisit this.
- */
- deleteOneObject(thisobj, &depRel, PERFORM_DELETION_INTERNAL);
- }
-
- /* And clean up */
- free_object_addresses(targetObjects);
-
- heap_close(depRel, RowExclusiveLock);
-}
-
/*
* findDependentObjects - find all objects that depend on 'object'
*
@@ -492,16 +435,22 @@ deleteWhatDependsOn(const ObjectAddress *object,
* its sub-objects too.
*
* object: the object to add to targetObjects and find dependencies on
- * flags: flags to be ORed into the object's targetObjects entry
+ * objflags: flags to be ORed into the object's targetObjects entry
+ * flags: PERFORM_DELETION_xxx flags for the deletion operation as a whole
* stack: list of objects being visited in current recursion; topmost item
* is the object that we recursed from (NULL for external callers)
* targetObjects: list of objects that are scheduled to be deleted
* pendingObjects: list of other objects slated for destruction, but
* not necessarily in targetObjects yet (can be NULL if none)
* *depRel: already opened pg_depend relation
+ *
+ * Note: objflags describes the reason for visiting this particular object
+ * at this time, and is not passed down when recursing. The flags argument
+ * is passed down, since it describes what we're doing overall.
*/
static void
findDependentObjects(const ObjectAddress *object,
+ int objflags,
int flags,
ObjectAddressStack *stack,
ObjectAddresses *targetObjects,
@@ -518,8 +467,8 @@ findDependentObjects(const ObjectAddress *object,
/*
* If the target object is already being visited in an outer recursion
- * level, just report the current flags back to that level and exit. This
- * is needed to avoid infinite recursion in the face of circular
+ * level, just report the current objflags back to that level and exit.
+ * This is needed to avoid infinite recursion in the face of circular
* dependencies.
*
* The stack check alone would result in dependency loops being broken at
@@ -532,19 +481,19 @@ findDependentObjects(const ObjectAddress *object,
* auto dependency, too, if we had to. However there are no known cases
* where that would be necessary.
*/
- if (stack_address_present_add_flags(object, flags, stack))
+ if (stack_address_present_add_flags(object, objflags, stack))
return;
/*
* It's also possible that the target object has already been completely
* processed and put into targetObjects. If so, again we just add the
- * specified flags to its entry and return.
+ * specified objflags to its entry and return.
*
* (Note: in these early-exit cases we could release the caller-taken
* lock, since the object is presumably now locked multiple times; but it
* seems not worth the cycles.)
*/
- if (object_address_present_add_flags(object, flags, targetObjects))
+ if (object_address_present_add_flags(object, objflags, targetObjects))
return;
/*
@@ -597,6 +546,15 @@ findDependentObjects(const ObjectAddress *object,
case DEPENDENCY_EXTENSION:
+ /*
+ * If told to, ignore EXTENSION dependencies altogether. This
+ * flag is normally used to prevent dropping extensions during
+ * temporary-object cleanup, even if a temp object was created
+ * during an extension script.
+ */
+ if (flags & PERFORM_DELETION_SKIP_EXTENSIONS)
+ break;
+
/*
* If the other object is the extension currently being
* created/altered, ignore this dependency and continue with
@@ -699,6 +657,7 @@ findDependentObjects(const ObjectAddress *object,
*/
findDependentObjects(&otherObject,
DEPFLAG_REVERSE,
+ flags,
stack,
targetObjects,
pendingObjects,
@@ -729,7 +688,7 @@ findDependentObjects(const ObjectAddress *object,
* they have to be deleted before the current object.
*/
mystack.object = object; /* set up a new stack level */
- mystack.flags = flags;
+ mystack.flags = objflags;
mystack.next = stack;
ScanKeyInit(&key[0],
@@ -783,7 +742,7 @@ findDependentObjects(const ObjectAddress *object,
continue;
}
- /* Recurse, passing flags indicating the dependency type */
+ /* Recurse, passing objflags indicating the dependency type */
switch (foundDep->deptype)
{
case DEPENDENCY_NORMAL:
@@ -820,6 +779,7 @@ findDependentObjects(const ObjectAddress *object,
findDependentObjects(&otherObject,
subflags,
+ flags,
&mystack,
targetObjects,
pendingObjects,
@@ -850,16 +810,17 @@ findDependentObjects(const ObjectAddress *object,
*
* targetObjects: list of objects that are scheduled to be deleted
* behavior: RESTRICT or CASCADE
- * msglevel: elog level for non-error report messages
+ * flags: other flags for the deletion operation
* origObject: base object of deletion, or NULL if not available
* (the latter case occurs in DROP OWNED)
*/
static void
reportDependentObjects(const ObjectAddresses *targetObjects,
DropBehavior behavior,
- int msglevel,
+ int flags,
const ObjectAddress *origObject)
{
+ int msglevel = (flags & PERFORM_DELETION_QUIETLY) ? DEBUG2 : NOTICE;
bool ok = true;
StringInfoData clientdetail;
StringInfoData logdetail;
@@ -1140,8 +1101,7 @@ doDeletion(const ObjectAddress *object, int flags)
if (relKind == RELKIND_INDEX)
{
- bool concurrent = ((flags & PERFORM_DELETION_CONCURRENTLY)
- == PERFORM_DELETION_CONCURRENTLY);
+ bool concurrent = ((flags & PERFORM_DELETION_CONCURRENTLY) != 0);
Assert(object->objectSubId == 0);
index_drop(object->objectId, concurrent);
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 0cf7b9eb62..0b804e7ac6 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -1285,10 +1285,6 @@ heap_create_with_catalog(const char *relname,
* should they have any ACL entries. The same applies for extension
* dependencies.
*
- * If it's a temp table, we do not make it an extension member; this
- * prevents the unintuitive result that deletion of the temp table at
- * session end would make the whole extension go away.
- *
* Also, skip this in bootstrap mode, since we don't make dependencies
* while bootstrapping.
*/
@@ -1309,8 +1305,7 @@ heap_create_with_catalog(const char *relname,
recordDependencyOnOwner(RelationRelationId, relid, ownerid);
- if (relpersistence != RELPERSISTENCE_TEMP)
- recordDependencyOnCurrentExtension(&myself, false);
+ recordDependencyOnCurrentExtension(&myself, false);
if (reloftypeid)
{
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index 8fd4c3136b..e3cfe22759 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -3872,14 +3872,19 @@ RemoveTempRelations(Oid tempNamespaceId)
/*
* We want to get rid of everything in the target namespace, but not the
* namespace itself (deleting it only to recreate it later would be a
- * waste of cycles). We do this by finding everything that has a
- * dependency on the namespace.
+ * waste of cycles). Hence, specify SKIP_ORIGINAL. It's also an INTERNAL
+ * deletion, and we want to not drop any extensions that might happen to
+ * own temp objects.
*/
object.classId = NamespaceRelationId;
object.objectId = tempNamespaceId;
object.objectSubId = 0;
- deleteWhatDependsOn(&object, false);
+ performDeletion(&object, DROP_CASCADE,
+ PERFORM_DELETION_INTERNAL |
+ PERFORM_DELETION_QUIETLY |
+ PERFORM_DELETION_SKIP_ORIGINAL |
+ PERFORM_DELETION_SKIP_EXTENSIONS);
}
/*
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 6f4b96b0f3..264298e8a9 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -2218,7 +2218,10 @@ do_autovacuum(void)
object.classId = RelationRelationId;
object.objectId = relid;
object.objectSubId = 0;
- performDeletion(&object, DROP_CASCADE, PERFORM_DELETION_INTERNAL);
+ performDeletion(&object, DROP_CASCADE,
+ PERFORM_DELETION_INTERNAL |
+ PERFORM_DELETION_QUIETLY |
+ PERFORM_DELETION_SKIP_EXTENSIONS);
/*
* To commit the deletion, end current transaction and start a new
diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h
index 09b36c5c78..4d84a6ba08 100644
--- a/src/include/catalog/dependency.h
+++ b/src/include/catalog/dependency.h
@@ -166,11 +166,15 @@ typedef enum ObjectClass
#define LAST_OCLASS OCLASS_TRANSFORM
+/* flag bits for performDeletion/performMultipleDeletions: */
+#define PERFORM_DELETION_INTERNAL 0x0001 /* internal action */
+#define PERFORM_DELETION_CONCURRENTLY 0x0002 /* concurrent drop */
+#define PERFORM_DELETION_QUIETLY 0x0004 /* suppress notices */
+#define PERFORM_DELETION_SKIP_ORIGINAL 0x0008 /* keep original obj */
+#define PERFORM_DELETION_SKIP_EXTENSIONS 0x0010 /* keep extensions */
-/* in dependency.c */
-#define PERFORM_DELETION_INTERNAL 0x0001
-#define PERFORM_DELETION_CONCURRENTLY 0x0002
+/* in dependency.c */
extern void performDeletion(const ObjectAddress *object,
DropBehavior behavior, int flags);
@@ -178,9 +182,6 @@ extern void performDeletion(const ObjectAddress *object,
extern void performMultipleDeletions(const ObjectAddresses *objects,
DropBehavior behavior, int flags);
-extern void deleteWhatDependsOn(const ObjectAddress *object,
- bool showNotices);
-
extern void recordDependencyOnExpr(const ObjectAddress *depender,
Node *expr, List *rtable,
DependencyType behavior);
diff --git a/src/include/commands/extension.h b/src/include/commands/extension.h
index e98b245ac1..c0e08ddf7d 100644
--- a/src/include/commands/extension.h
+++ b/src/include/commands/extension.h
@@ -19,10 +19,13 @@
/*
- * creating_extension is only true while running a CREATE EXTENSION command.
- * It instructs recordDependencyOnCurrentExtension() to register a dependency
- * on the current pg_extension object for each SQL object created by its
- * installation script.
+ * creating_extension is only true while running a CREATE EXTENSION or ALTER
+ * EXTENSION UPDATE command. It instructs recordDependencyOnCurrentExtension()
+ * to register a dependency on the current pg_extension object for each SQL
+ * object created by an extension script. It also instructs performDeletion()
+ * to remove such dependencies without following them, so that extension
+ * scripts can drop member objects without having to explicitly dissociate
+ * them from the extension first.
*/
extern PGDLLIMPORT bool creating_extension;
extern Oid CurrentExtensionObject;
diff --git a/src/test/modules/test_extensions/Makefile b/src/test/modules/test_extensions/Makefile
index b184570779..d18108e4e5 100644
--- a/src/test/modules/test_extensions/Makefile
+++ b/src/test/modules/test_extensions/Makefile
@@ -4,10 +4,10 @@ MODULE = test_extensions
PGFILEDESC = "test_extensions - regression testing for EXTENSION support"
EXTENSION = test_ext1 test_ext2 test_ext3 test_ext4 test_ext5 test_ext6 \
- test_ext7 test_ext_cyclic1 test_ext_cyclic2
+ test_ext7 test_ext8 test_ext_cyclic1 test_ext_cyclic2
DATA = test_ext1--1.0.sql test_ext2--1.0.sql test_ext3--1.0.sql \
test_ext4--1.0.sql test_ext5--1.0.sql test_ext6--1.0.sql \
- test_ext7--1.0.sql test_ext7--1.0--2.0.sql \
+ test_ext7--1.0.sql test_ext7--1.0--2.0.sql test_ext8--1.0.sql \
test_ext_cyclic1--1.0.sql test_ext_cyclic2--1.0.sql
REGRESS = test_extensions test_extdepend
diff --git a/src/test/modules/test_extensions/expected/test_extensions.out b/src/test/modules/test_extensions/expected/test_extensions.out
index ea096b9fb6..a24820e735 100644
--- a/src/test/modules/test_extensions/expected/test_extensions.out
+++ b/src/test/modules/test_extensions/expected/test_extensions.out
@@ -63,3 +63,61 @@ Objects in extension "test_ext7"
table ext7_table2
(2 rows)
+-- test handling of temp objects created by extensions
+create extension test_ext8;
+-- \dx+ would expose a variable pg_temp_nn schema name, so we can't use it here
+select regexp_replace(pg_describe_object(classid, objid, objsubid),
+ 'pg_temp_\d+', 'pg_temp', 'g') as "Object Description"
+from pg_depend
+where refclassid = 'pg_extension'::regclass and deptype = 'e' and
+ refobjid = (select oid from pg_extension where extname = 'test_ext8')
+order by 1;
+ Object Description
+-----------------------------------------
+ function ext8_even(posint)
+ function pg_temp.ext8_temp_even(posint)
+ table ext8_table1
+ table ext8_temp_table1
+ type posint
+(5 rows)
+
+-- Should be possible to drop and recreate this extension
+drop extension test_ext8;
+create extension test_ext8;
+select regexp_replace(pg_describe_object(classid, objid, objsubid),
+ 'pg_temp_\d+', 'pg_temp', 'g') as "Object Description"
+from pg_depend
+where refclassid = 'pg_extension'::regclass and deptype = 'e' and
+ refobjid = (select oid from pg_extension where extname = 'test_ext8')
+order by 1;
+ Object Description
+-----------------------------------------
+ function ext8_even(posint)
+ function pg_temp.ext8_temp_even(posint)
+ table ext8_table1
+ table ext8_temp_table1
+ type posint
+(5 rows)
+
+-- here we want to start a new session and wait till old one is gone
+select pg_backend_pid() as oldpid \gset
+\c -
+do 'declare c int = 0;
+begin
+ while (select count(*) from pg_stat_activity where pid = '
+ :'oldpid'
+ ') > 0 loop c := c + 1; end loop;
+ raise log ''test_extensions looped % times'', c;
+end';
+-- extension should now contain no temp objects
+\dx+ test_ext8
+Objects in extension "test_ext8"
+ Object Description
+----------------------------
+ function ext8_even(posint)
+ table ext8_table1
+ type posint
+(3 rows)
+
+-- dropping it should still work
+drop extension test_ext8;
diff --git a/src/test/modules/test_extensions/sql/test_extensions.sql b/src/test/modules/test_extensions/sql/test_extensions.sql
index b53be00c4e..5e884d187f 100644
--- a/src/test/modules/test_extensions/sql/test_extensions.sql
+++ b/src/test/modules/test_extensions/sql/test_extensions.sql
@@ -25,3 +25,42 @@ create extension test_ext7;
\dx+ test_ext7
alter extension test_ext7 update to '2.0';
\dx+ test_ext7
+
+-- test handling of temp objects created by extensions
+create extension test_ext8;
+
+-- \dx+ would expose a variable pg_temp_nn schema name, so we can't use it here
+select regexp_replace(pg_describe_object(classid, objid, objsubid),
+ 'pg_temp_\d+', 'pg_temp', 'g') as "Object Description"
+from pg_depend
+where refclassid = 'pg_extension'::regclass and deptype = 'e' and
+ refobjid = (select oid from pg_extension where extname = 'test_ext8')
+order by 1;
+
+-- Should be possible to drop and recreate this extension
+drop extension test_ext8;
+create extension test_ext8;
+
+select regexp_replace(pg_describe_object(classid, objid, objsubid),
+ 'pg_temp_\d+', 'pg_temp', 'g') as "Object Description"
+from pg_depend
+where refclassid = 'pg_extension'::regclass and deptype = 'e' and
+ refobjid = (select oid from pg_extension where extname = 'test_ext8')
+order by 1;
+
+-- here we want to start a new session and wait till old one is gone
+select pg_backend_pid() as oldpid \gset
+\c -
+do 'declare c int = 0;
+begin
+ while (select count(*) from pg_stat_activity where pid = '
+ :'oldpid'
+ ') > 0 loop c := c + 1; end loop;
+ raise log ''test_extensions looped % times'', c;
+end';
+
+-- extension should now contain no temp objects
+\dx+ test_ext8
+
+-- dropping it should still work
+drop extension test_ext8;
diff --git a/src/test/modules/test_extensions/test_ext8--1.0.sql b/src/test/modules/test_extensions/test_ext8--1.0.sql
new file mode 100644
index 0000000000..1561ffefaa
--- /dev/null
+++ b/src/test/modules/test_extensions/test_ext8--1.0.sql
@@ -0,0 +1,21 @@
+/* src/test/modules/test_extensions/test_ext8--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION test_ext8" to load this file. \quit
+
+-- create some random data type
+create domain posint as int check (value > 0);
+
+-- use it in regular and temporary tables and functions
+
+create table ext8_table1 (f1 posint);
+
+create temp table ext8_temp_table1 (f1 posint);
+
+create function ext8_even (posint) returns bool as
+ 'select ($1 % 2) = 0' language sql;
+
+create function pg_temp.ext8_temp_even (posint) returns bool as
+ 'select ($1 % 2) = 0' language sql;
+
+-- we intentionally don't drop the temp objects before exiting
diff --git a/src/test/modules/test_extensions/test_ext8.control b/src/test/modules/test_extensions/test_ext8.control
new file mode 100644
index 0000000000..70f8caaf30
--- /dev/null
+++ b/src/test/modules/test_extensions/test_ext8.control
@@ -0,0 +1,4 @@
+comment = 'Test extension 8'
+default_version = '1.0'
+schema = 'public'
+relocatable = false
--
cgit v1.2.3
From fe0a0b5993dfe24e4b3bcf52fa64ff41a444b8f1 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas
Date: Mon, 5 Dec 2016 13:42:59 +0200
Subject: Replace PostmasterRandom() with a stronger source, second attempt.
This adds a new routine, pg_strong_random() for generating random bytes,
for use in both frontend and backend. At the moment, it's only used in
the backend, but the upcoming SCRAM authentication patches need strong
random numbers in libpq as well.
pg_strong_random() is based on, and replaces, the existing implementation
in pgcrypto. It can acquire strong random numbers from a number of sources,
depending on what's available:
- OpenSSL RAND_bytes(), if built with OpenSSL
- On Windows, the native cryptographic functions are used
- /dev/urandom
Unlike the current pgcrypto function, the source is chosen by configure.
That makes it easier to test different implementations, and ensures that
we don't accidentally fall back to a less secure implementation, if the
primary source fails. All of those methods are quite reliable, it would be
pretty surprising for them to fail, so we'd rather find out by failing
hard.
If no strong random source is available, we fall back to using erand48(),
seeded from current timestamp, like PostmasterRandom() was. That isn't
cryptographically secure, but allows us to still work on platforms that
don't have any of the above stronger sources. Because it's not very secure,
the built-in implementation is only used if explicitly requested with
--disable-strong-random.
This replaces the more complicated Fortuna algorithm we used to have in
pgcrypto, which is unfortunate, but all modern platforms have /dev/urandom,
so it doesn't seem worth the maintenance effort to keep that. pgcrypto
functions that require strong random numbers will be disabled with
--disable-strong-random.
Original patch by Magnus Hagander, tons of further work by Michael Paquier
and me.
Discussion: https://www.postgresql.org/message-id/CAB7nPqRy3krN8quR9XujMVVHYtXJ0_60nqgVc6oUk8ygyVkZsA@mail.gmail.com
Discussion: https://www.postgresql.org/message-id/CAB7nPqRWkNYRRPJA7-cF+LfroYV10pvjdz6GNvxk-Eee9FypKA@mail.gmail.com
---
configure | 109 +++++
configure.in | 52 +++
contrib/pgcrypto/Makefile | 2 +-
contrib/pgcrypto/expected/pgp-compression_1.out | 42 ++
contrib/pgcrypto/expected/pgp-decrypt_1.out | 424 +++++++++++++++++++
contrib/pgcrypto/expected/pgp-encrypt_1.out | 161 +++++++
contrib/pgcrypto/expected/pgp-pubkey-encrypt_1.out | 62 +++
contrib/pgcrypto/fortuna.c | 463 ---------------------
contrib/pgcrypto/fortuna.h | 38 --
contrib/pgcrypto/internal.c | 63 ---
contrib/pgcrypto/openssl.c | 46 --
contrib/pgcrypto/pgcrypto.c | 27 +-
contrib/pgcrypto/pgp-encrypt.c | 22 +-
contrib/pgcrypto/pgp-mpi-internal.c | 10 +-
contrib/pgcrypto/pgp-pgsql.c | 95 +----
contrib/pgcrypto/pgp-pubenc.c | 26 +-
contrib/pgcrypto/pgp-s2k.c | 15 +-
contrib/pgcrypto/px-crypt.c | 7 +-
contrib/pgcrypto/px.c | 34 +-
contrib/pgcrypto/px.h | 7 +-
contrib/pgcrypto/random.c | 247 -----------
doc/src/sgml/installation.sgml | 17 +
src/Makefile.global.in | 1 +
src/backend/libpq/auth.c | 62 ++-
src/backend/libpq/crypt.c | 10 +-
src/backend/postmaster/postmaster.c | 146 ++++---
src/backend/storage/ipc/ipci.c | 3 +
src/backend/storage/lmgr/lwlocknames.txt | 1 +
src/backend/utils/init/globals.c | 2 +-
src/backend/utils/misc/Makefile | 5 +-
src/backend/utils/misc/backend_random.c | 158 +++++++
src/include/libpq/crypt.h | 2 +-
src/include/libpq/libpq-be.h | 1 -
src/include/miscadmin.h | 2 +-
src/include/pg_config.h.in | 12 +
src/include/pg_config.h.win32 | 12 +
src/include/port.h | 6 +
src/include/utils/backend_random.h | 19 +
src/port/Makefile | 4 +
src/port/erand48.c | 7 +
src/port/pg_strong_random.c | 149 +++++++
src/tools/msvc/Mkvcbuild.pm | 5 +-
42 files changed, 1472 insertions(+), 1104 deletions(-)
create mode 100644 contrib/pgcrypto/expected/pgp-compression_1.out
create mode 100644 contrib/pgcrypto/expected/pgp-decrypt_1.out
create mode 100644 contrib/pgcrypto/expected/pgp-encrypt_1.out
create mode 100644 contrib/pgcrypto/expected/pgp-pubkey-encrypt_1.out
delete mode 100644 contrib/pgcrypto/fortuna.c
delete mode 100644 contrib/pgcrypto/fortuna.h
delete mode 100644 contrib/pgcrypto/random.c
create mode 100644 src/backend/utils/misc/backend_random.c
create mode 100644 src/include/utils/backend_random.h
create mode 100644 src/port/pg_strong_random.c
(limited to 'doc/src')
diff --git a/configure b/configure
index f4f2f8b7ce..551fc720b7 100755
--- a/configure
+++ b/configure
@@ -739,6 +739,7 @@ GENHTML
LCOV
GCOV
enable_debug
+enable_strong_random
enable_rpath
default_port
WANTED_LANGUAGES
@@ -806,6 +807,7 @@ with_pgport
enable_rpath
enable_spinlocks
enable_atomics
+enable_strong_random
enable_debug
enable_profiling
enable_coverage
@@ -1478,6 +1480,7 @@ Optional Features:
executables
--disable-spinlocks do not use spinlocks
--disable-atomics do not use atomic operations
+ --disable-strong-random do not use a strong random number source
--enable-debug build with debugging symbols (-g)
--enable-profiling build with profiling enabled
--enable-coverage build with coverage testing instrumentation
@@ -3192,6 +3195,34 @@ fi
+#
+# Random number generation
+#
+
+
+# Check whether --enable-strong-random was given.
+if test "${enable_strong_random+set}" = set; then :
+ enableval=$enable_strong_random;
+ case $enableval in
+ yes)
+ :
+ ;;
+ no)
+ :
+ ;;
+ *)
+ as_fn_error $? "no argument expected for --enable-strong-random option" "$LINENO" 5
+ ;;
+ esac
+
+else
+ enable_strong_random=yes
+
+fi
+
+
+
+
#
# --enable-debug adds -g to compiler flags
#
@@ -14982,6 +15013,84 @@ $as_echo "#define USE_WIN32_SHARED_MEMORY 1" >>confdefs.h
SHMEM_IMPLEMENTATION="src/backend/port/win32_shmem.c"
fi
+# Select random number source
+#
+# You can override this logic by setting the appropriate USE_*RANDOM flag to 1
+# in the template or configure command line.
+
+# If not selected manually, try to select a source automatically.
+if test "$enable_strong_random" = "yes" && test x"$USE_OPENSSL_RANDOM" = x"" && test x"$USE_WIN32_RANDOM" = x"" && test x"$USE_DEV_URANDOM" = x"" ; then
+ if test x"$with_openssl" = x"yes" ; then
+ USE_OPENSSL_RANDOM=1
+ elif test "$PORTNAME" = x"win32" ; then
+ USE_WIN32_RANDOM=1
+ else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for /dev/urandom" >&5
+$as_echo_n "checking for /dev/urandom... " >&6; }
+if ${ac_cv_file__dev_urandom+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ test "$cross_compiling" = yes &&
+ as_fn_error $? "cannot check for file existence when cross compiling" "$LINENO" 5
+if test -r "/dev/urandom"; then
+ ac_cv_file__dev_urandom=yes
+else
+ ac_cv_file__dev_urandom=no
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_file__dev_urandom" >&5
+$as_echo "$ac_cv_file__dev_urandom" >&6; }
+if test "x$ac_cv_file__dev_urandom" = xyes; then :
+
+fi
+
+
+ if test x"$ac_cv_file__dev_urandom" = x"yes" ; then
+ USE_DEV_URANDOM=1
+ fi
+ fi
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking which random number source to use" >&5
+$as_echo_n "checking which random number source to use... " >&6; }
+if test "$enable_strong_random" = yes ; then
+ if test x"$USE_OPENSSL_RANDOM" = x"1" ; then
+
+$as_echo "#define USE_OPENSSL_RANDOM 1" >>confdefs.h
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: OpenSSL" >&5
+$as_echo "OpenSSL" >&6; }
+ elif test x"$USE_WIN32_RANDOM" = x"1" ; then
+
+$as_echo "#define USE_WIN32_RANDOM 1" >>confdefs.h
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: Windows native" >&5
+$as_echo "Windows native" >&6; }
+ elif test x"$USE_DEV_URANDOM" = x"1" ; then
+
+$as_echo "#define USE_DEV_URANDOM 1" >>confdefs.h
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: /dev/urandom" >&5
+$as_echo "/dev/urandom" >&6; }
+ else
+ as_fn_error $? "
+no source of strong random numbers was found
+PostgreSQL can use OpenSSL or /dev/urandom as a source of random numbers,
+for authentication protocols. You can use --disable-strong-random to use of a built-in
+pseudo random number generator, but that may be insecure." "$LINENO" 5
+ fi
+
+$as_echo "#define HAVE_STRONG_RANDOM 1" >>confdefs.h
+
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: weak builtin PRNG" >&5
+$as_echo "weak builtin PRNG" >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING:
+*** Not using a strong random number source may be insecure." >&5
+$as_echo "$as_me: WARNING:
+*** Not using a strong random number source may be insecure." >&2;}
+fi
+
# If not set in template file, set bytes to use libc memset()
if test x"$MEMSET_LOOP_LIMIT" = x"" ; then
MEMSET_LOOP_LIMIT=1024
diff --git a/configure.in b/configure.in
index 9f7611caeb..595e047d0e 100644
--- a/configure.in
+++ b/configure.in
@@ -193,6 +193,13 @@ PGAC_ARG_BOOL(enable, spinlocks, yes,
PGAC_ARG_BOOL(enable, atomics, yes,
[do not use atomic operations])
+#
+# Random number generation
+#
+PGAC_ARG_BOOL(enable, strong-random, yes,
+ [do not use a strong random number source])
+AC_SUBST(enable_strong_random)
+
#
# --enable-debug adds -g to compiler flags
#
@@ -1965,6 +1972,51 @@ else
SHMEM_IMPLEMENTATION="src/backend/port/win32_shmem.c"
fi
+# Select random number source
+#
+# You can override this logic by setting the appropriate USE_*RANDOM flag to 1
+# in the template or configure command line.
+
+# If not selected manually, try to select a source automatically.
+if test "$enable_strong_random" = "yes" && test x"$USE_OPENSSL_RANDOM" = x"" && test x"$USE_WIN32_RANDOM" = x"" && test x"$USE_DEV_URANDOM" = x"" ; then
+ if test x"$with_openssl" = x"yes" ; then
+ USE_OPENSSL_RANDOM=1
+ elif test "$PORTNAME" = x"win32" ; then
+ USE_WIN32_RANDOM=1
+ else
+ AC_CHECK_FILE([/dev/urandom], [], [])
+
+ if test x"$ac_cv_file__dev_urandom" = x"yes" ; then
+ USE_DEV_URANDOM=1
+ fi
+ fi
+fi
+
+AC_MSG_CHECKING([which random number source to use])
+if test "$enable_strong_random" = yes ; then
+ if test x"$USE_OPENSSL_RANDOM" = x"1" ; then
+ AC_DEFINE(USE_OPENSSL_RANDOM, 1, [Define to use OpenSSL for random number generation])
+ AC_MSG_RESULT([OpenSSL])
+ elif test x"$USE_WIN32_RANDOM" = x"1" ; then
+ AC_DEFINE(USE_WIN32_RANDOM, 1, [Define to use native Windows API for random number generation])
+ AC_MSG_RESULT([Windows native])
+ elif test x"$USE_DEV_URANDOM" = x"1" ; then
+ AC_DEFINE(USE_DEV_URANDOM, 1, [Define to use /dev/urandom for random number generation])
+ AC_MSG_RESULT([/dev/urandom])
+ else
+ AC_MSG_ERROR([
+no source of strong random numbers was found
+PostgreSQL can use OpenSSL or /dev/urandom as a source of random numbers,
+for authentication protocols. You can use --disable-strong-random to use of a built-in
+pseudo random number generator, but that may be insecure.])
+ fi
+ AC_DEFINE(HAVE_STRONG_RANDOM, 1, [Define to use have a strong random number source])
+else
+ AC_MSG_RESULT([weak builtin PRNG])
+ AC_MSG_WARN([
+*** Not using a strong random number source may be insecure.])
+fi
+
# If not set in template file, set bytes to use libc memset()
if test x"$MEMSET_LOOP_LIMIT" = x"" ; then
MEMSET_LOOP_LIMIT=1024
diff --git a/contrib/pgcrypto/Makefile b/contrib/pgcrypto/Makefile
index 805db7626b..f65d84d1f3 100644
--- a/contrib/pgcrypto/Makefile
+++ b/contrib/pgcrypto/Makefile
@@ -1,7 +1,7 @@
# contrib/pgcrypto/Makefile
INT_SRCS = md5.c sha1.c sha2.c internal.c internal-sha2.c blf.c rijndael.c \
- fortuna.c random.c pgp-mpi-internal.c imath.c
+ pgp-mpi-internal.c imath.c
INT_TESTS = sha2
OSSL_SRCS = openssl.c pgp-mpi-openssl.c
diff --git a/contrib/pgcrypto/expected/pgp-compression_1.out b/contrib/pgcrypto/expected/pgp-compression_1.out
new file mode 100644
index 0000000000..25d5c35bf7
--- /dev/null
+++ b/contrib/pgcrypto/expected/pgp-compression_1.out
@@ -0,0 +1,42 @@
+--
+-- PGP compression support
+--
+select pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+
+ww0ECQMCsci6AdHnELlh0kQB4jFcVwHMJg0Bulop7m3Mi36s15TAhBo0AnzIrRFrdLVCkKohsS6+
+DMcmR53SXfLoDJOv/M8uKj3QSq7oWNIp95pxfA==
+=tbSn
+-----END PGP MESSAGE-----
+'), 'key', 'expect-compress-algo=1');
+ pgp_sym_decrypt
+-----------------
+ Secret message
+(1 row)
+
+select pgp_sym_decrypt(
+ pgp_sym_encrypt('Secret message', 'key', 'compress-algo=0'),
+ 'key', 'expect-compress-algo=0');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+select pgp_sym_decrypt(
+ pgp_sym_encrypt('Secret message', 'key', 'compress-algo=1'),
+ 'key', 'expect-compress-algo=1');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+select pgp_sym_decrypt(
+ pgp_sym_encrypt('Secret message', 'key', 'compress-algo=2'),
+ 'key', 'expect-compress-algo=2');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+-- level=0 should turn compression off
+select pgp_sym_decrypt(
+ pgp_sym_encrypt('Secret message', 'key',
+ 'compress-algo=2, compress-level=0'),
+ 'key', 'expect-compress-algo=0');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
diff --git a/contrib/pgcrypto/expected/pgp-decrypt_1.out b/contrib/pgcrypto/expected/pgp-decrypt_1.out
new file mode 100644
index 0000000000..d9e1e386a2
--- /dev/null
+++ b/contrib/pgcrypto/expected/pgp-decrypt_1.out
@@ -0,0 +1,424 @@
+--
+-- pgp_descrypt tests
+--
+-- Checking ciphers
+select pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: dat1.blowfish.sha1.mdc.s2k3.z0
+
+jA0EBAMCfFNwxnvodX9g0jwB4n4s26/g5VmKzVab1bX1SmwY7gvgvlWdF3jKisvS
+yA6Ce1QTMK3KdL2MPfamsTUSAML8huCJMwYQFfE=
+=JcP+
+-----END PGP MESSAGE-----
+'), 'foobar');
+ pgp_sym_decrypt
+-----------------
+ Secret message.
+(1 row)
+
+select pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: dat1.aes.sha1.mdc.s2k3.z0
+
+jA0EBwMCci97v0Q6Z0Zg0kQBsVf5Oe3iC+FBzUmuMV9KxmAyOMyjCc/5i8f1Eest
+UTAsG35A1vYs02VARKzGz6xI2UHwFUirP+brPBg3Ee7muOx8pA==
+=XtrP
+-----END PGP MESSAGE-----
+'), 'foobar');
+ pgp_sym_decrypt
+-----------------
+ Secret message.
+(1 row)
+
+select pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: dat1.aes192.sha1.mdc.s2k3.z0
+
+jA0ECAMCI7YQpWqp3D1g0kQBCjB7GlX7+SQeXNleXeXQ78ZAPNliquGDq9u378zI
+5FPTqAhIB2/2fjY8QEIs1ai00qphjX2NitxV/3Wn+6dufB4Q4g==
+=rCZt
+-----END PGP MESSAGE-----
+'), 'foobar');
+ pgp_sym_decrypt
+-----------------
+ Secret message.
+(1 row)
+
+select pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: dat1.aes256.sha1.mdc.s2k3.z0
+
+jA0ECQMC4f/5djqCC1Rg0kQBTHEPsD+Sw7biBsM2er3vKyGPAQkuTBGKC5ie7hT/
+lceMfQdbAg6oTFyJpk/wH18GzRDphCofg0X8uLgkAKMrpcmgog==
+=fB6S
+-----END PGP MESSAGE-----
+'), 'foobar');
+ pgp_sym_decrypt
+-----------------
+ Secret message.
+(1 row)
+
+-- Checking MDC modes
+select pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: dat1.aes.sha1.nomdc.s2k3.z0
+
+jA0EBwMCnv07rlXqWctgyS2Dm2JfOKCRL4sLSLJUC8RS2cH7cIhKSuLitOtyquB+
+u9YkgfJfsuRJmgQ9tmo=
+=60ui
+-----END PGP MESSAGE-----
+'), 'foobar');
+ pgp_sym_decrypt
+-----------------
+ Secret message.
+(1 row)
+
+select pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: dat1.aes.sha1.mdc.s2k3.z0
+
+jA0EBwMCEeP3idNjQ1Bg0kQBf4G0wX+2QNzLh2YNwYkQgQkfYhn/hLXjV4nK9nsE
+8Ex1Dsdt5UPvOz8W8VKQRS6loOfOe+yyXil8W3IYFwUpdDUi+Q==
+=moGf
+-----END PGP MESSAGE-----
+'), 'foobar');
+ pgp_sym_decrypt
+-----------------
+ Secret message.
+(1 row)
+
+-- Checking hashes
+select pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: dat1.aes.md5.mdc.s2k3.z0
+
+jA0EBwMClrXXtOXetohg0kQBn0Kl1ymevQZRHkdoYRHgzCwSQEiss7zYff2UNzgO
+KyRrHf7zEBuZiZ2AG34jNVMOLToj1jJUg5zTSdecUzQVCykWTA==
+=NyLk
+-----END PGP MESSAGE-----
+'), 'foobar');
+ pgp_sym_decrypt
+-----------------
+ Secret message.
+(1 row)
+
+select pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: dat1.aes.sha1.mdc.s2k3.z0
+
+jA0EBwMCApbdlrURoWJg0kQBzHM/E0o7djY82bNuspjxjAcPFrrtp0uvDdMQ4z2m
+/PM8jhgI5vxFYfNQjLl8y3fHYIomk9YflN9K/Q13iq8A8sjeTw==
+=FxbQ
+-----END PGP MESSAGE-----
+'), 'foobar');
+ pgp_sym_decrypt
+-----------------
+ Secret message.
+(1 row)
+
+-- Checking S2K modes
+select pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: dat1.aes.sha1.mdc.s2k0.z0
+
+jAQEBwAC0kQBKTaLAKE3xzps+QIZowqRNb2eAdzBw2LxEW2YD5PgNlbhJdGg+dvw
+Ah9GXjGS1TVALzTImJbz1uHUZRfhJlFbc5yGQw==
+=YvkV
+-----END PGP MESSAGE-----
+'), 'foobar');
+ pgp_sym_decrypt
+-----------------
+ Secret message.
+(1 row)
+
+select pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: dat1.aes.sha1.mdc.s2k1.z0
+
+jAwEBwEC/QTByBLI3b/SRAHPxKzI6SZBo5lAEOD+EsvKQWO4adL9tDY+++Iqy1xK
+4IaWXVKEj9R2Lr2xntWWMGZtcKtjD2lFFRXXd9dZp1ZThNDz
+=dbXm
+-----END PGP MESSAGE-----
+'), 'foobar');
+ pgp_sym_decrypt
+-----------------
+ Secret message.
+(1 row)
+
+select pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: dat1.aes.sha1.mdc.s2k3.z0
+
+jA0EBwMCEq4Su3ZqNEJg0kQB4QG5jBTKF0i04xtH+avzmLhstBNRxvV3nsmB3cwl
+z+9ZaA/XdSx5ZiFnMym8P6r8uY9rLjjNptvvRHlxIReF+p9MNg==
+=VJKg
+-----END PGP MESSAGE-----
+'), 'foobar');
+ pgp_sym_decrypt
+-----------------
+ Secret message.
+(1 row)
+
+select pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: dat1.aes192.sha1.mdc.s2k0.z0
+
+jAQECAAC0kQBBDnQWkgsx9YFaqDfWmpsiyAJ6y2xG/sBvap1dySYEMuZ+wJTXQ9E
+Cr3i2M7TgVZ0M4jp4QL0adG1lpN5iK7aQeOwMw==
+=cg+i
+-----END PGP MESSAGE-----
+'), 'foobar');
+ pgp_sym_decrypt
+-----------------
+ Secret message.
+(1 row)
+
+select pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: dat1.aes192.sha1.mdc.s2k1.z0
+
+jAwECAECruOfyNDFiTnSRAEVoGXm4A9UZKkWljdzjEO/iaE7mIraltIpQMkiqCh9
+7h8uZ2u9uRBOv222fZodGvc6bvq/4R4hAa/6qSHtm8mdmvGt
+=aHmC
+-----END PGP MESSAGE-----
+'), 'foobar');
+ pgp_sym_decrypt
+-----------------
+ Secret message.
+(1 row)
+
+select pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: dat1.aes192.sha1.mdc.s2k3.z0
+
+jA0ECAMCjFn6SRi3SONg0kQBqtSHPaD0m7rXfDAhCWU/ypAsI93GuHGRyM99cvMv
+q6eF6859ZVnli3BFSDSk3a4e/pXhglxmDYCfjAXkozKNYLo6yw==
+=K0LS
+-----END PGP MESSAGE-----
+'), 'foobar');
+ pgp_sym_decrypt
+-----------------
+ Secret message.
+(1 row)
+
+select pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: dat1.aes256.sha1.mdc.s2k0.z0
+
+jAQECQAC0kQB4L1eMbani07XF2ZYiXNK9LW3v8w41oUPl7dStmrJPQFwsdxmrDHu
+rQr3WbdKdY9ufjOE5+mXI+EFkSPrF9rL9NCq6w==
+=RGts
+-----END PGP MESSAGE-----
+'), 'foobar');
+ pgp_sym_decrypt
+-----------------
+ Secret message.
+(1 row)
+
+select pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: dat1.aes256.sha1.mdc.s2k1.z0
+
+jAwECQECKHhrou7ZOIXSRAHWIVP+xjVQcjAVBTt+qh9SNzYe248xFTwozkwev3mO
++KVJW0qhk0An+Y2KF99/bYFl9cL5D3Tl43fC8fXGl3x3m7pR
+=SUrU
+-----END PGP MESSAGE-----
+'), 'foobar');
+ pgp_sym_decrypt
+-----------------
+ Secret message.
+(1 row)
+
+select pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: dat1.aes256.sha1.mdc.s2k3.z0
+
+jA0ECQMCjc8lwZu8Fz1g0kQBkEzjImi21liep5jj+3dAJ2aZFfUkohi8b3n9z+7+
+4+NRzL7cMW2RLAFnJbiqXDlRHMwleeuLN1up2WIxsxtYYuaBjA==
+=XZrG
+-----END PGP MESSAGE-----
+'), 'foobar');
+ pgp_sym_decrypt
+-----------------
+ Secret message.
+(1 row)
+
+-- Checking longer passwords
+select pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: dat1.aes.sha1.mdc.s2k3.z0
+
+jA0EBwMCx6dBiuqrYNRg0kQBEo63AvA1SCslxP7ayanLf1H0/hlk2nONVhTwVEWi
+tTGup1mMz6Cfh1uDRErUuXpx9A0gdMu7zX0o5XjrL7WGDAZdSw==
+=XKKG
+-----END PGP MESSAGE-----
+'), '0123456789abcdefghij');
+ pgp_sym_decrypt
+-----------------
+ Secret message.
+(1 row)
+
+select pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: dat1.aes.sha1.mdc.s2k3.z0
+
+jA0EBwMCBDvYuS990iFg0kQBW31UK5OiCjWf5x6KJ8qNNT2HZWQCjCBZMU0XsOC6
+CMxFKadf144H/vpoV9GA0f22keQgCl0EsTE4V4lweVOPTKCMJg==
+=gWDh
+-----END PGP MESSAGE-----
+'), '0123456789abcdefghij2jk4h5g2j54khg23h54g2kh54g2khj54g23hj54');
+ pgp_sym_decrypt
+-----------------
+ Secret message.
+(1 row)
+
+select pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: dat1.aes.sha1.mdc.s2k3.z0
+
+jA0EBwMCqXbFafC+ofVg0kQBejyiPqH0QMERVGfmPOjtAxvyG5KDIJPYojTgVSDt
+FwsDabdQUz5O7bgNSnxfmyw1OifGF+W2bIn/8W+0rDf8u3+O+Q==
+=OxOF
+-----END PGP MESSAGE-----
+'), 'x');
+ pgp_sym_decrypt
+-----------------
+ Secret message.
+(1 row)
+
+-- Checking various data
+select encode(digest(pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: dat1.aes.sha1.mdc.s2k3.z0
+
+jA0EBwMCGJ+SpuOysINg0kQBJfSjzsW0x4OVcAyr17O7FBvMTwIGeGcJd99oTQU8
+Xtx3kDqnhUq9Z1fS3qPbi5iNP2A9NxOBxPWz2JzxhydANlgbxg==
+=W/ik
+-----END PGP MESSAGE-----
+'), '0123456789abcdefghij'), 'sha1'), 'hex');
+ encode
+------------------------------------------
+ 0225e3ede6f2587b076d021a189ff60aad67e066
+(1 row)
+
+-- expected: 0225e3ede6f2587b076d021a189ff60aad67e066
+select encode(digest(pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: dat2.aes.sha1.mdc.s2k3.z0
+
+jA0EBwMCvdpDvidNzMxg0jUBvj8eS2+1t/9/zgemxvhtc0fvdKGGbjH7dleaTJRB
+SaV9L04ky1qECNDx3XjnoKLC+H7IOQ==
+=Fxen
+-----END PGP MESSAGE-----
+'), '0123456789abcdefghij'), 'sha1'), 'hex');
+ encode
+------------------------------------------
+ da39a3ee5e6b4b0d3255bfef95601890afd80709
+(1 row)
+
+-- expected: da39a3ee5e6b4b0d3255bfef95601890afd80709
+select encode(digest(pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: dat3.aes.sha1.mdc.s2k3.z0
+
+jA0EBwMCxQvxJZ3G/HRg0lgBeYmTa7/uDAjPyFwSX4CYBgpZWVn/JS8JzILrcWF8
+gFnkUKIE0PSaYFp+Yi1VlRfUtRQ/X/LYNGa7tWZS+4VQajz2Xtz4vUeAEiYFYPXk
+73Hb8m1yRhQK
+=ivrD
+-----END PGP MESSAGE-----
+'), '0123456789abcdefghij'), 'sha1'), 'hex');
+ encode
+------------------------------------------
+ 5e5c135efc0dd00633efc6dfd6e731ea408a5b4c
+(1 row)
+
+-- expected: 5e5c135efc0dd00633efc6dfd6e731ea408a5b4c
+-- Checking CRLF
+select encode(digest(pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: crlf mess
+
+ww0ECQMCt7VAtby6l4Bi0lgB5KMIZiiF/b3CfMfUyY0eDncsGXtkbu1X+l9brjpMP8eJnY79Amms
+a3nsOzKTXUfS9VyaXo8IrncM6n7fdaXpwba/3tNsAhJG4lDv1k4g9v8Ix2dfv6Rs
+=mBP9
+-----END PGP MESSAGE-----
+'), 'key', 'convert-crlf=0'), 'sha1'), 'hex');
+ encode
+------------------------------------------
+ 9353062be7720f1446d30b9e75573a4833886784
+(1 row)
+
+-- expected: 9353062be7720f1446d30b9e75573a4833886784
+select encode(digest(pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+Comment: crlf mess
+
+ww0ECQMCt7VAtby6l4Bi0lgB5KMIZiiF/b3CfMfUyY0eDncsGXtkbu1X+l9brjpMP8eJnY79Amms
+a3nsOzKTXUfS9VyaXo8IrncM6n7fdaXpwba/3tNsAhJG4lDv1k4g9v8Ix2dfv6Rs
+=mBP9
+-----END PGP MESSAGE-----
+'), 'key', 'convert-crlf=1'), 'sha1'), 'hex');
+ encode
+------------------------------------------
+ 7efefcab38467f7484d6fa43dc86cf5281bd78e2
+(1 row)
+
+-- expected: 7efefcab38467f7484d6fa43dc86cf5281bd78e2
+-- check BUG #11905, problem with messages 6 less than a power of 2.
+select pgp_sym_decrypt(pgp_sym_encrypt(repeat('x',65530),'1'),'1') = repeat('x',65530);
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+-- expected: true
+-- Negative tests
+-- Decryption with a certain incorrect key yields an apparent Literal Data
+-- packet reporting its content to be binary data. Ciphertext source:
+-- iterative pgp_sym_encrypt('secret', 'key') until the random prefix gave
+-- rise to that property.
+select pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+
+ww0EBwMCxf8PTrQBmJdl0jcB6y2joE7GSLKRv7trbNsF5Z8ou5NISLUg31llVH/S0B2wl4bvzZjV
+VsxxqLSPzNLAeIspJk5G
+=mSd/
+-----END PGP MESSAGE-----
+'), 'wrong-key', 'debug=1');
+NOTICE: dbg: prefix_init: corrupt prefix
+NOTICE: dbg: parse_literal_data: data type=b
+NOTICE: dbg: mdcbuf_finish: bad MDC pkt hdr
+ERROR: Wrong key or corrupt data
+-- Routine text/binary mismatch.
+select pgp_sym_decrypt(pgp_sym_encrypt_bytea('P', 'key'), 'key', 'debug=1');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+-- Decryption with a certain incorrect key yields an apparent BZip2-compressed
+-- plaintext. Ciphertext source: iterative pgp_sym_encrypt('secret', 'key')
+-- until the random prefix gave rise to that property.
+select pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+
+ww0EBwMC9rK/dMkF5Zlt0jcBlzAQ1mQY2qYbKYbw8h3EZ5Jk0K2IiY92R82TRhWzBIF/8cmXDPtP
+GXsd65oYJZp3Khz0qfyn
+=Nmpq
+-----END PGP MESSAGE-----
+'), 'wrong-key', 'debug=1');
+NOTICE: dbg: prefix_init: corrupt prefix
+NOTICE: dbg: parse_compressed_data: bzip2 unsupported
+NOTICE: dbg: mdcbuf_finish: bad MDC pkt hdr
+ERROR: Wrong key or corrupt data
+-- Routine use of BZip2 compression. Ciphertext source:
+-- echo x | gpg --homedir /nonexistent --personal-compress-preferences bzip2 \
+-- --personal-cipher-preferences aes --no-emit-version --batch \
+-- --symmetric --passphrase key --armor
+select pgp_sym_decrypt(dearmor('
+-----BEGIN PGP MESSAGE-----
+
+jA0EBwMCRhFrAKNcLVJg0mMBLJG1cCASNk/x/3dt1zJ+2eo7jHfjgg3N6wpB3XIe
+QCwkWJwlBG5pzbO5gu7xuPQN+TbPJ7aQ2sLx3bAHhtYb0i3vV9RO10Gw++yUyd4R
+UCAAw2JRIISttRHMfDpDuZJpvYo=
+=AZ9M
+-----END PGP MESSAGE-----
+'), 'key', 'debug=1');
+NOTICE: dbg: parse_compressed_data: bzip2 unsupported
+ERROR: Unsupported compression algorithm
diff --git a/contrib/pgcrypto/expected/pgp-encrypt_1.out b/contrib/pgcrypto/expected/pgp-encrypt_1.out
new file mode 100644
index 0000000000..2291e662ec
--- /dev/null
+++ b/contrib/pgcrypto/expected/pgp-encrypt_1.out
@@ -0,0 +1,161 @@
+--
+-- PGP encrypt
+--
+-- ensure consistent test output regardless of the default bytea format
+SET bytea_output TO escape;
+select pgp_sym_decrypt(pgp_sym_encrypt('Secret.', 'key'), 'key');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+-- check whether the defaults are ok
+select pgp_sym_decrypt(pgp_sym_encrypt('Secret.', 'key'),
+ 'key', 'expect-cipher-algo=aes128,
+ expect-disable-mdc=0,
+ expect-sess-key=0,
+ expect-s2k-mode=3,
+ expect-s2k-digest-algo=sha1,
+ expect-compress-algo=0
+ ');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+-- maybe the expect- stuff simply does not work
+select pgp_sym_decrypt(pgp_sym_encrypt('Secret.', 'key'),
+ 'key', 'expect-cipher-algo=bf,
+ expect-disable-mdc=1,
+ expect-sess-key=1,
+ expect-s2k-mode=0,
+ expect-s2k-digest-algo=md5,
+ expect-compress-algo=1
+ ');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+-- bytea as text
+select pgp_sym_decrypt(pgp_sym_encrypt_bytea('Binary', 'baz'), 'baz');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+-- text as bytea
+select pgp_sym_decrypt_bytea(pgp_sym_encrypt('Text', 'baz'), 'baz');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+-- algorithm change
+select pgp_sym_decrypt(
+ pgp_sym_encrypt('Secret.', 'key', 'cipher-algo=bf'),
+ 'key', 'expect-cipher-algo=bf');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+select pgp_sym_decrypt(
+ pgp_sym_encrypt('Secret.', 'key', 'cipher-algo=aes'),
+ 'key', 'expect-cipher-algo=aes128');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+select pgp_sym_decrypt(
+ pgp_sym_encrypt('Secret.', 'key', 'cipher-algo=aes192'),
+ 'key', 'expect-cipher-algo=aes192');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+-- s2k change
+select pgp_sym_decrypt(
+ pgp_sym_encrypt('Secret.', 'key', 's2k-mode=0'),
+ 'key', 'expect-s2k-mode=0');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+select pgp_sym_decrypt(
+ pgp_sym_encrypt('Secret.', 'key', 's2k-mode=1'),
+ 'key', 'expect-s2k-mode=1');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+select pgp_sym_decrypt(
+ pgp_sym_encrypt('Secret.', 'key', 's2k-mode=3'),
+ 'key', 'expect-s2k-mode=3');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+-- s2k count change
+select pgp_sym_decrypt(
+ pgp_sym_encrypt('Secret.', 'key', 's2k-count=1024'),
+ 'key', 'expect-s2k-count=1024');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+-- s2k_count rounds up
+select pgp_sym_decrypt(
+ pgp_sym_encrypt('Secret.', 'key', 's2k-count=65000000'),
+ 'key', 'expect-s2k-count=65000000');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+-- s2k digest change
+select pgp_sym_decrypt(
+ pgp_sym_encrypt('Secret.', 'key', 's2k-digest-algo=md5'),
+ 'key', 'expect-s2k-digest-algo=md5');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+select pgp_sym_decrypt(
+ pgp_sym_encrypt('Secret.', 'key', 's2k-digest-algo=sha1'),
+ 'key', 'expect-s2k-digest-algo=sha1');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+-- sess key
+select pgp_sym_decrypt(
+ pgp_sym_encrypt('Secret.', 'key', 'sess-key=0'),
+ 'key', 'expect-sess-key=0');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+select pgp_sym_decrypt(
+ pgp_sym_encrypt('Secret.', 'key', 'sess-key=1'),
+ 'key', 'expect-sess-key=1');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+select pgp_sym_decrypt(
+ pgp_sym_encrypt('Secret.', 'key', 'sess-key=1, cipher-algo=bf'),
+ 'key', 'expect-sess-key=1, expect-cipher-algo=bf');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+select pgp_sym_decrypt(
+ pgp_sym_encrypt('Secret.', 'key', 'sess-key=1, cipher-algo=aes192'),
+ 'key', 'expect-sess-key=1, expect-cipher-algo=aes192');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+select pgp_sym_decrypt(
+ pgp_sym_encrypt('Secret.', 'key', 'sess-key=1, cipher-algo=aes256'),
+ 'key', 'expect-sess-key=1, expect-cipher-algo=aes256');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+-- no mdc
+select pgp_sym_decrypt(
+ pgp_sym_encrypt('Secret.', 'key', 'disable-mdc=1'),
+ 'key', 'expect-disable-mdc=1');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+-- crlf
+select encode(pgp_sym_decrypt_bytea(
+ pgp_sym_encrypt(E'1\n2\n3\r\n', 'key', 'convert-crlf=1'),
+ 'key'), 'hex');
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+-- conversion should be lossless
+select encode(digest(pgp_sym_decrypt(
+ pgp_sym_encrypt(E'\r\n0\n1\r\r\n\n2\r', 'key', 'convert-crlf=1'),
+ 'key', 'convert-crlf=1'), 'sha1'), 'hex') as result,
+ encode(digest(E'\r\n0\n1\r\r\n\n2\r', 'sha1'), 'hex') as expect;
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
diff --git a/contrib/pgcrypto/expected/pgp-pubkey-encrypt_1.out b/contrib/pgcrypto/expected/pgp-pubkey-encrypt_1.out
new file mode 100644
index 0000000000..3b1822ed91
--- /dev/null
+++ b/contrib/pgcrypto/expected/pgp-pubkey-encrypt_1.out
@@ -0,0 +1,62 @@
+--
+-- PGP Public Key Encryption
+--
+-- ensure consistent test output regardless of the default bytea format
+SET bytea_output TO escape;
+-- successful encrypt/decrypt
+select pgp_pub_decrypt(
+ pgp_pub_encrypt('Secret msg', dearmor(pubkey)),
+ dearmor(seckey))
+from keytbl where keytbl.id=1;
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+select pgp_pub_decrypt(
+ pgp_pub_encrypt('Secret msg', dearmor(pubkey)),
+ dearmor(seckey))
+from keytbl where keytbl.id=2;
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+select pgp_pub_decrypt(
+ pgp_pub_encrypt('Secret msg', dearmor(pubkey)),
+ dearmor(seckey))
+from keytbl where keytbl.id=3;
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+select pgp_pub_decrypt(
+ pgp_pub_encrypt('Secret msg', dearmor(pubkey)),
+ dearmor(seckey))
+from keytbl where keytbl.id=6;
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+-- try with rsa-sign only
+select pgp_pub_decrypt(
+ pgp_pub_encrypt('Secret msg', dearmor(pubkey)),
+ dearmor(seckey))
+from keytbl where keytbl.id=4;
+ERROR: No encryption key found
+-- try with secret key
+select pgp_pub_decrypt(
+ pgp_pub_encrypt('Secret msg', dearmor(seckey)),
+ dearmor(seckey))
+from keytbl where keytbl.id=1;
+ERROR: Refusing to encrypt with secret key
+-- does text-to-bytea works
+select pgp_pub_decrypt_bytea(
+ pgp_pub_encrypt('Secret msg', dearmor(pubkey)),
+ dearmor(seckey))
+from keytbl where keytbl.id=1;
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
+-- and bytea-to-text?
+select pgp_pub_decrypt(
+ pgp_pub_encrypt_bytea('Secret msg', dearmor(pubkey)),
+ dearmor(seckey))
+from keytbl where keytbl.id=1;
+ERROR: pg_random_bytes() is not supported by this build
+DETAIL: This functionality requires a source of strong random numbers
+HINT: You need to rebuild PostgreSQL using --enable-strong-random
diff --git a/contrib/pgcrypto/fortuna.c b/contrib/pgcrypto/fortuna.c
deleted file mode 100644
index 5028203479..0000000000
--- a/contrib/pgcrypto/fortuna.c
+++ /dev/null
@@ -1,463 +0,0 @@
-/*
- * fortuna.c
- * Fortuna-like PRNG.
- *
- * Copyright (c) 2005 Marko Kreen
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * contrib/pgcrypto/fortuna.c
- */
-
-#include "postgres.h"
-
-#include
-#include
-
-#include "px.h"
-#include "rijndael.h"
-#include "sha2.h"
-#include "fortuna.h"
-
-
-/*
- * Why Fortuna-like: There does not seem to be any definitive reference
- * on Fortuna in the net. Instead this implementation is based on
- * following references:
- *
- * http://en.wikipedia.org/wiki/Fortuna_(PRNG)
- * - Wikipedia article
- * http://jlcooke.ca/random/
- * - Jean-Luc Cooke Fortuna-based /dev/random driver for Linux.
- */
-
-/*
- * There is some confusion about whether and how to carry forward
- * the state of the pools. Seems like original Fortuna does not
- * do it, resetting hash after each request. I guess expecting
- * feeding to happen more often that requesting. This is absolutely
- * unsuitable for pgcrypto, as nothing asynchronous happens here.
- *
- * J.L. Cooke fixed this by feeding previous hash to new re-initialized
- * hash context.
- *
- * Fortuna predecessor Yarrow requires ability to query intermediate
- * 'final result' from hash, without affecting it.
- *
- * This implementation uses the Yarrow method - asking intermediate
- * results, but continuing with old state.
- */
-
-
-/*
- * Algorithm parameters
- */
-
-/*
- * How many pools.
- *
- * Original Fortuna uses 32 pools, that means 32'th pool is
- * used not earlier than in 13th year. This is a waste in
- * pgcrypto, as we have very low-frequancy seeding. Here
- * is preferable to have all entropy usable in reasonable time.
- *
- * With 23 pools, 23th pool is used after 9 days which seems
- * more sane.
- *
- * In our case the minimal cycle time would be bit longer
- * than the system-randomness feeding frequency.
- */
-#define NUM_POOLS 23
-
-/* in microseconds */
-#define RESEED_INTERVAL 100000 /* 0.1 sec */
-
-/* for one big request, reseed after this many bytes */
-#define RESEED_BYTES (1024*1024)
-
-/*
- * Skip reseed if pool 0 has less than this many
- * bytes added since last reseed.
- */
-#define POOL0_FILL (256/8)
-
-/*
- * Algorithm constants
- */
-
-/* Both cipher key size and hash result size */
-#define BLOCK 32
-
-/* cipher block size */
-#define CIPH_BLOCK 16
-
-/* for internal wrappers */
-#define MD_CTX SHA256_CTX
-#define CIPH_CTX rijndael_ctx
-
-struct fortuna_state
-{
- uint8 counter[CIPH_BLOCK];
- uint8 result[CIPH_BLOCK];
- uint8 key[BLOCK];
- MD_CTX pool[NUM_POOLS];
- CIPH_CTX ciph;
- unsigned reseed_count;
- struct timeval last_reseed_time;
- unsigned pool0_bytes;
- unsigned rnd_pos;
- int tricks_done;
-};
-typedef struct fortuna_state FState;
-
-
-/*
- * Use our own wrappers here.
- * - Need to get intermediate result from digest, without affecting it.
- * - Need re-set key on a cipher context.
- * - Algorithms are guaranteed to exist.
- * - No memory allocations.
- */
-
-static void
-ciph_init(CIPH_CTX * ctx, const uint8 *key, int klen)
-{
- rijndael_set_key(ctx, (const uint32 *) key, klen, 1);
-}
-
-static void
-ciph_encrypt(CIPH_CTX * ctx, const uint8 *in, uint8 *out)
-{
- rijndael_encrypt(ctx, (const uint32 *) in, (uint32 *) out);
-}
-
-static void
-md_init(MD_CTX * ctx)
-{
- SHA256_Init(ctx);
-}
-
-static void
-md_update(MD_CTX * ctx, const uint8 *data, int len)
-{
- SHA256_Update(ctx, data, len);
-}
-
-static void
-md_result(MD_CTX * ctx, uint8 *dst)
-{
- SHA256_CTX tmp;
-
- memcpy(&tmp, ctx, sizeof(*ctx));
- SHA256_Final(dst, &tmp);
- px_memset(&tmp, 0, sizeof(tmp));
-}
-
-/*
- * initialize state
- */
-static void
-init_state(FState *st)
-{
- int i;
-
- memset(st, 0, sizeof(*st));
- for (i = 0; i < NUM_POOLS; i++)
- md_init(&st->pool[i]);
-}
-
-/*
- * Endianess does not matter.
- * It just needs to change without repeating.
- */
-static void
-inc_counter(FState *st)
-{
- uint32 *val = (uint32 *) st->counter;
-
- if (++val[0])
- return;
- if (++val[1])
- return;
- if (++val[2])
- return;
- ++val[3];
-}
-
-/*
- * This is called 'cipher in counter mode'.
- */
-static void
-encrypt_counter(FState *st, uint8 *dst)
-{
- ciph_encrypt(&st->ciph, st->counter, dst);
- inc_counter(st);
-}
-
-
-/*
- * The time between reseed must be at least RESEED_INTERVAL
- * microseconds.
- */
-static int
-enough_time_passed(FState *st)
-{
- int ok;
- struct timeval tv;
- struct timeval *last = &st->last_reseed_time;
-
- gettimeofday(&tv, NULL);
-
- /* check how much time has passed */
- ok = 0;
- if (tv.tv_sec > last->tv_sec + 1)
- ok = 1;
- else if (tv.tv_sec == last->tv_sec + 1)
- {
- if (1000000 + tv.tv_usec - last->tv_usec >= RESEED_INTERVAL)
- ok = 1;
- }
- else if (tv.tv_usec - last->tv_usec >= RESEED_INTERVAL)
- ok = 1;
-
- /* reseed will happen, update last_reseed_time */
- if (ok)
- memcpy(last, &tv, sizeof(tv));
-
- px_memset(&tv, 0, sizeof(tv));
-
- return ok;
-}
-
-/*
- * generate new key from all the pools
- */
-static void
-reseed(FState *st)
-{
- unsigned k;
- unsigned n;
- MD_CTX key_md;
- uint8 buf[BLOCK];
-
- /* set pool as empty */
- st->pool0_bytes = 0;
-
- /*
- * Both #0 and #1 reseed would use only pool 0. Just skip #0 then.
- */
- n = ++st->reseed_count;
-
- /*
- * The goal: use k-th pool only 1/(2^k) of the time.
- */
- md_init(&key_md);
- for (k = 0; k < NUM_POOLS; k++)
- {
- md_result(&st->pool[k], buf);
- md_update(&key_md, buf, BLOCK);
-
- if (n & 1 || !n)
- break;
- n >>= 1;
- }
-
- /* add old key into mix too */
- md_update(&key_md, st->key, BLOCK);
-
- /* now we have new key */
- md_result(&key_md, st->key);
-
- /* use new key */
- ciph_init(&st->ciph, st->key, BLOCK);
-
- px_memset(&key_md, 0, sizeof(key_md));
- px_memset(buf, 0, BLOCK);
-}
-
-/*
- * Pick a random pool. This uses key bytes as random source.
- */
-static unsigned
-get_rand_pool(FState *st)
-{
- unsigned rnd;
-
- /*
- * This slightly prefers lower pools - that is OK.
- */
- rnd = st->key[st->rnd_pos] % NUM_POOLS;
-
- st->rnd_pos++;
- if (st->rnd_pos >= BLOCK)
- st->rnd_pos = 0;
-
- return rnd;
-}
-
-/*
- * update pools
- */
-static void
-add_entropy(FState *st, const uint8 *data, unsigned len)
-{
- unsigned pos;
- uint8 hash[BLOCK];
- MD_CTX md;
-
- /* hash given data */
- md_init(&md);
- md_update(&md, data, len);
- md_result(&md, hash);
-
- /*
- * Make sure the pool 0 is initialized, then update randomly.
- */
- if (st->reseed_count == 0)
- pos = 0;
- else
- pos = get_rand_pool(st);
- md_update(&st->pool[pos], hash, BLOCK);
-
- if (pos == 0)
- st->pool0_bytes += len;
-
- px_memset(hash, 0, BLOCK);
- px_memset(&md, 0, sizeof(md));
-}
-
-/*
- * Just take 2 next blocks as new key
- */
-static void
-rekey(FState *st)
-{
- encrypt_counter(st, st->key);
- encrypt_counter(st, st->key + CIPH_BLOCK);
- ciph_init(&st->ciph, st->key, BLOCK);
-}
-
-/*
- * Hide public constants. (counter, pools > 0)
- *
- * This can also be viewed as spreading the startup
- * entropy over all of the components.
- */
-static void
-startup_tricks(FState *st)
-{
- int i;
- uint8 buf[BLOCK];
-
- /* Use next block as counter. */
- encrypt_counter(st, st->counter);
-
- /* Now shuffle pools, excluding #0 */
- for (i = 1; i < NUM_POOLS; i++)
- {
- encrypt_counter(st, buf);
- encrypt_counter(st, buf + CIPH_BLOCK);
- md_update(&st->pool[i], buf, BLOCK);
- }
- px_memset(buf, 0, BLOCK);
-
- /* Hide the key. */
- rekey(st);
-
- /* This can be done only once. */
- st->tricks_done = 1;
-}
-
-static void
-extract_data(FState *st, unsigned count, uint8 *dst)
-{
- unsigned n;
- unsigned block_nr = 0;
-
- /* Should we reseed? */
- if (st->pool0_bytes >= POOL0_FILL || st->reseed_count == 0)
- if (enough_time_passed(st))
- reseed(st);
-
- /* Do some randomization on first call */
- if (!st->tricks_done)
- startup_tricks(st);
-
- while (count > 0)
- {
- /* produce bytes */
- encrypt_counter(st, st->result);
-
- /* copy result */
- if (count > CIPH_BLOCK)
- n = CIPH_BLOCK;
- else
- n = count;
- memcpy(dst, st->result, n);
- dst += n;
- count -= n;
-
- /* must not give out too many bytes with one key */
- block_nr++;
- if (block_nr > (RESEED_BYTES / CIPH_BLOCK))
- {
- rekey(st);
- block_nr = 0;
- }
- }
- /* Set new key for next request. */
- rekey(st);
-}
-
-/*
- * public interface
- */
-
-static FState main_state;
-static int init_done = 0;
-
-void
-fortuna_add_entropy(const uint8 *data, unsigned len)
-{
- if (!init_done)
- {
- init_state(&main_state);
- init_done = 1;
- }
- if (!data || !len)
- return;
- add_entropy(&main_state, data, len);
-}
-
-void
-fortuna_get_bytes(unsigned len, uint8 *dst)
-{
- if (!init_done)
- {
- init_state(&main_state);
- init_done = 1;
- }
- if (!dst || !len)
- return;
- extract_data(&main_state, len, dst);
-}
diff --git a/contrib/pgcrypto/fortuna.h b/contrib/pgcrypto/fortuna.h
deleted file mode 100644
index bf9f4768d1..0000000000
--- a/contrib/pgcrypto/fortuna.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * fortuna.c
- * Fortuna PRNG.
- *
- * Copyright (c) 2005 Marko Kreen
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * contrib/pgcrypto/fortuna.h
- */
-
-#ifndef __FORTUNA_H
-#define __FORTUNA_H
-
-void fortuna_get_bytes(unsigned len, uint8 *dst);
-void fortuna_add_entropy(const uint8 *data, unsigned len);
-
-#endif
diff --git a/contrib/pgcrypto/internal.c b/contrib/pgcrypto/internal.c
index 02ff976c25..2516092ba4 100644
--- a/contrib/pgcrypto/internal.c
+++ b/contrib/pgcrypto/internal.c
@@ -38,7 +38,6 @@
#include "sha1.h"
#include "blf.h"
#include "rijndael.h"
-#include "fortuna.h"
/*
* System reseeds should be separated at least this much.
@@ -615,65 +614,3 @@ px_find_cipher(const char *name, PX_Cipher **res)
*res = c;
return 0;
}
-
-/*
- * Randomness provider
- */
-
-static time_t seed_time = 0;
-static time_t check_time = 0;
-
-static void
-system_reseed(void)
-{
- uint8 buf[1024];
- int n;
- time_t t;
- int skip = 1;
-
- t = time(NULL);
-
- if (seed_time == 0)
- skip = 0;
- else if ((t - seed_time) < SYSTEM_RESEED_MIN)
- skip = 1;
- else if ((t - seed_time) > SYSTEM_RESEED_MAX)
- skip = 0;
- else if (check_time == 0 ||
- (t - check_time) > SYSTEM_RESEED_CHECK_TIME)
- {
- check_time = t;
-
- /* roll dice */
- px_get_random_bytes(buf, 1);
- skip = buf[0] >= SYSTEM_RESEED_CHANCE;
- }
- /* clear 1 byte */
- px_memset(buf, 0, sizeof(buf));
-
- if (skip)
- return;
-
- n = px_acquire_system_randomness(buf);
- if (n > 0)
- fortuna_add_entropy(buf, n);
-
- seed_time = t;
- px_memset(buf, 0, sizeof(buf));
-}
-
-int
-px_get_random_bytes(uint8 *dst, unsigned count)
-{
- system_reseed();
- fortuna_get_bytes(count, dst);
- return 0;
-}
-
-int
-px_add_entropy(const uint8 *data, unsigned count)
-{
- system_reseed();
- fortuna_add_entropy(data, count);
- return 0;
-}
diff --git a/contrib/pgcrypto/openssl.c b/contrib/pgcrypto/openssl.c
index f3e3a92486..1d3e58d925 100644
--- a/contrib/pgcrypto/openssl.c
+++ b/contrib/pgcrypto/openssl.c
@@ -712,49 +712,3 @@ px_find_cipher(const char *name, PX_Cipher **res)
*res = c;
return 0;
}
-
-
-static int openssl_random_init = 0;
-
-/*
- * OpenSSL random should re-feeded occasionally. From /dev/urandom
- * preferably.
- */
-static void
-init_openssl_rand(void)
-{
- if (RAND_get_rand_method() == NULL)
- {
-#ifdef HAVE_RAND_OPENSSL
- RAND_set_rand_method(RAND_OpenSSL());
-#else
- RAND_set_rand_method(RAND_SSLeay());
-#endif
- }
- openssl_random_init = 1;
-}
-
-int
-px_get_random_bytes(uint8 *dst, unsigned count)
-{
- int res;
-
- if (!openssl_random_init)
- init_openssl_rand();
-
- res = RAND_bytes(dst, count);
- if (res == 1)
- return count;
-
- return PXE_OSSL_RAND_ERROR;
-}
-
-int
-px_add_entropy(const uint8 *data, unsigned count)
-{
- /*
- * estimate 0 bits
- */
- RAND_add(data, count, 0);
- return 0;
-}
diff --git a/contrib/pgcrypto/pgcrypto.c b/contrib/pgcrypto/pgcrypto.c
index 27b96c7cc4..d815de3073 100644
--- a/contrib/pgcrypto/pgcrypto.c
+++ b/contrib/pgcrypto/pgcrypto.c
@@ -34,6 +34,7 @@
#include
#include "parser/scansup.h"
+#include "utils/backend_random.h"
#include "utils/builtins.h"
#include "utils/uuid.h"
@@ -422,7 +423,7 @@ PG_FUNCTION_INFO_V1(pg_random_bytes);
Datum
pg_random_bytes(PG_FUNCTION_ARGS)
{
- int err;
+#ifdef HAVE_STRONG_RANDOM
int len = PG_GETARG_INT32(0);
bytea *res;
@@ -435,13 +436,13 @@ pg_random_bytes(PG_FUNCTION_ARGS)
SET_VARSIZE(res, VARHDRSZ + len);
/* generate result */
- err = px_get_random_bytes((uint8 *) VARDATA(res), len);
- if (err < 0)
- ereport(ERROR,
- (errcode(ERRCODE_EXTERNAL_ROUTINE_INVOCATION_EXCEPTION),
- errmsg("Random generator error: %s", px_strerror(err))));
+ if (!pg_strong_random(VARDATA(res), len))
+ px_THROW_ERROR(PXE_NO_RANDOM);
PG_RETURN_BYTEA_P(res);
+#else
+ px_THROW_ERROR(PXE_NO_RANDOM);
+#endif
}
/* SQL function: gen_random_uuid() returns uuid */
@@ -451,14 +452,14 @@ Datum
pg_random_uuid(PG_FUNCTION_ARGS)
{
uint8 *buf = (uint8 *) palloc(UUID_LEN);
- int err;
- /* generate random bits */
- err = px_get_random_bytes(buf, UUID_LEN);
- if (err < 0)
- ereport(ERROR,
- (errcode(ERRCODE_EXTERNAL_ROUTINE_INVOCATION_EXCEPTION),
- errmsg("Random generator error: %s", px_strerror(err))));
+ /*
+ * Generate random bits. pg_backend_random() will do here, we don't
+ * promis UUIDs to be cryptographically random, when built with
+ * --disable-strong-random.
+ */
+ if (!pg_backend_random((char *) buf, UUID_LEN))
+ px_THROW_ERROR(PXE_NO_RANDOM);
/*
* Set magic numbers for a "version 4" (pseudorandom) UUID, see
diff --git a/contrib/pgcrypto/pgp-encrypt.c b/contrib/pgcrypto/pgp-encrypt.c
index c9148fd2fc..be933bf86c 100644
--- a/contrib/pgcrypto/pgp-encrypt.c
+++ b/contrib/pgcrypto/pgp-encrypt.c
@@ -37,6 +37,8 @@
#include "px.h"
#include "pgp.h"
+#include "utils/backend_random.h"
+
#define MDC_DIGEST_LEN 20
#define STREAM_ID 0xE0
@@ -477,14 +479,14 @@ init_encdata_packet(PushFilter **pf_res, PGP_Context *ctx, PushFilter *dst)
static int
write_prefix(PGP_Context *ctx, PushFilter *dst)
{
+#ifdef HAVE_STRONG_RANDOM
uint8 prefix[PGP_MAX_BLOCK + 2];
int res,
bs;
bs = pgp_get_cipher_block_size(ctx->cipher_algo);
- res = px_get_random_bytes(prefix, bs);
- if (res < 0)
- return res;
+ if (!pg_backend_random((char *) prefix, bs))
+ return PXE_NO_RANDOM;
prefix[bs + 0] = prefix[bs - 2];
prefix[bs + 1] = prefix[bs - 1];
@@ -492,6 +494,9 @@ write_prefix(PGP_Context *ctx, PushFilter *dst)
res = pushf_write(dst, prefix, bs + 2);
px_memset(prefix, 0, bs + 2);
return res < 0 ? res : 0;
+#else
+ return PXE_NO_RANDOM;
+#endif
}
/*
@@ -578,14 +583,15 @@ init_s2k_key(PGP_Context *ctx)
static int
init_sess_key(PGP_Context *ctx)
{
- int res;
-
if (ctx->use_sess_key || ctx->pub_key)
{
+#ifdef HAVE_STRONG_RANDOM
ctx->sess_key_len = pgp_get_cipher_key_size(ctx->cipher_algo);
- res = px_get_random_bytes(ctx->sess_key, ctx->sess_key_len);
- if (res < 0)
- return res;
+ if (!pg_strong_random((char *) ctx->sess_key, ctx->sess_key_len))
+ return PXE_NO_RANDOM;
+#else
+ return PXE_NO_RANDOM;
+#endif
}
else
{
diff --git a/contrib/pgcrypto/pgp-mpi-internal.c b/contrib/pgcrypto/pgp-mpi-internal.c
index be95f2d092..cb70fcba6c 100644
--- a/contrib/pgcrypto/pgp-mpi-internal.c
+++ b/contrib/pgcrypto/pgp-mpi-internal.c
@@ -57,17 +57,16 @@ mp_clear_free(mpz_t *a)
static int
mp_px_rand(uint32 bits, mpz_t *res)
{
- int err;
+#ifdef HAVE_STRONG_RANDOM
unsigned bytes = (bits + 7) / 8;
int last_bits = bits & 7;
uint8 *buf;
buf = px_alloc(bytes);
- err = px_get_random_bytes(buf, bytes);
- if (err < 0)
+ if (!pg_strong_random((char *) buf, bytes))
{
px_free(buf);
- return err;
+ return PXE_NO_RANDOM;
}
/* clear unnecessary bits and set last bit to one */
@@ -84,6 +83,9 @@ mp_px_rand(uint32 bits, mpz_t *res)
px_free(buf);
return 0;
+#else
+ return PXE_NO_RANDOM;
+#endif
}
static void
diff --git a/contrib/pgcrypto/pgp-pgsql.c b/contrib/pgcrypto/pgp-pgsql.c
index 1f65b667ca..ce16db71d8 100644
--- a/contrib/pgcrypto/pgp-pgsql.c
+++ b/contrib/pgcrypto/pgp-pgsql.c
@@ -61,65 +61,6 @@ PG_FUNCTION_INFO_V1(pg_armor);
PG_FUNCTION_INFO_V1(pg_dearmor);
PG_FUNCTION_INFO_V1(pgp_armor_headers);
-/*
- * Mix a block of data into RNG.
- */
-static void
-add_block_entropy(PX_MD *md, text *data)
-{
- uint8 sha1[20];
-
- px_md_reset(md);
- px_md_update(md, (uint8 *) VARDATA(data), VARSIZE(data) - VARHDRSZ);
- px_md_finish(md, sha1);
-
- px_add_entropy(sha1, 20);
-
- px_memset(sha1, 0, 20);
-}
-
-/*
- * Mix user data into RNG. It is for user own interests to have
- * RNG state shuffled.
- */
-static void
-add_entropy(text *data1, text *data2, text *data3)
-{
- PX_MD *md;
- uint8 rnd[3];
-
- if (!data1 && !data2 && !data3)
- return;
-
- if (px_get_random_bytes(rnd, 3) < 0)
- return;
-
- if (px_find_digest("sha1", &md) < 0)
- return;
-
- /*
- * Try to make the feeding unpredictable.
- *
- * Prefer data over keys, as it's rather likely that key is same in
- * several calls.
- */
-
- /* chance: 7/8 */
- if (data1 && rnd[0] >= 32)
- add_block_entropy(md, data1);
-
- /* chance: 5/8 */
- if (data2 && rnd[1] >= 160)
- add_block_entropy(md, data2);
-
- /* chance: 5/8 */
- if (data3 && rnd[2] >= 160)
- add_block_entropy(md, data3);
-
- px_md_free(md);
- px_memset(rnd, 0, sizeof(rnd));
-}
-
/*
* returns src in case of no conversion or error
*/
@@ -432,11 +373,7 @@ init_work(PGP_Context **ctx_p, int is_text,
VARSIZE(args) - VARHDRSZ, ex);
if (err)
- {
- ereport(ERROR,
- (errcode(ERRCODE_EXTERNAL_ROUTINE_INVOCATION_EXCEPTION),
- errmsg("%s", px_strerror(err))));
- }
+ px_THROW_ERROR(err);
if (ex->debug)
px_set_debug_handler(show_debug);
@@ -459,11 +396,6 @@ encrypt_internal(int is_pubenc, int is_text,
struct debug_expect ex;
text *tmp_data = NULL;
- /*
- * Add data and key info RNG.
- */
- add_entropy(data, key, NULL);
-
init_work(&ctx, is_text, args, &ex);
if (is_text && pgp_get_unicode_mode(ctx))
@@ -516,9 +448,7 @@ encrypt_internal(int is_pubenc, int is_text,
pgp_free(ctx);
mbuf_free(src);
mbuf_free(dst);
- ereport(ERROR,
- (errcode(ERRCODE_EXTERNAL_ROUTINE_INVOCATION_EXCEPTION),
- errmsg("%s", px_strerror(err))));
+ px_THROW_ERROR(err);
}
/* res_len includes VARHDRSZ */
@@ -605,9 +535,7 @@ decrypt_internal(int is_pubenc, int need_text, text *data,
{
px_set_debug_handler(NULL);
mbuf_free(dst);
- ereport(ERROR,
- (errcode(ERRCODE_EXTERNAL_ROUTINE_INVOCATION_EXCEPTION),
- errmsg("%s", px_strerror(err))));
+ px_THROW_ERROR(err);
}
res_len = mbuf_steal_data(dst, &restmp);
@@ -629,11 +557,6 @@ decrypt_internal(int is_pubenc, int need_text, text *data,
}
px_set_debug_handler(NULL);
- /*
- * add successful decryptions also into RNG
- */
- add_entropy(res, key, keypsw);
-
return res;
}
@@ -985,9 +908,7 @@ pg_dearmor(PG_FUNCTION_ARGS)
ret = pgp_armor_decode((uint8 *) VARDATA(data), data_len, &buf);
if (ret < 0)
- ereport(ERROR,
- (errcode(ERRCODE_EXTERNAL_ROUTINE_INVOCATION_EXCEPTION),
- errmsg("%s", px_strerror(ret))));
+ px_THROW_ERROR(ret);
res = palloc(VARHDRSZ + buf.len);
SET_VARSIZE(res, VARHDRSZ + buf.len);
memcpy(VARDATA(res), buf.data, buf.len);
@@ -1041,9 +962,7 @@ pgp_armor_headers(PG_FUNCTION_ARGS)
&state->nheaders, &state->keys,
&state->values);
if (res < 0)
- ereport(ERROR,
- (errcode(ERRCODE_EXTERNAL_ROUTINE_INVOCATION_EXCEPTION),
- errmsg("%s", px_strerror(res))));
+ px_THROW_ERROR(res);
MemoryContextSwitchTo(oldcontext);
funcctx->user_fctx = state;
@@ -1092,9 +1011,7 @@ pgp_key_id_w(PG_FUNCTION_ARGS)
res_len = pgp_get_keyid(buf, VARDATA(res));
mbuf_free(buf);
if (res_len < 0)
- ereport(ERROR,
- (errcode(ERRCODE_EXTERNAL_ROUTINE_INVOCATION_EXCEPTION),
- errmsg("%s", px_strerror(res_len))));
+ px_THROW_ERROR(res_len);
SET_VARSIZE(res, VARHDRSZ + res_len);
PG_FREE_IF_COPY(data, 0);
diff --git a/contrib/pgcrypto/pgp-pubenc.c b/contrib/pgcrypto/pgp-pubenc.c
index 3b43bb61c0..4439876664 100644
--- a/contrib/pgcrypto/pgp-pubenc.c
+++ b/contrib/pgcrypto/pgp-pubenc.c
@@ -39,7 +39,7 @@
static int
pad_eme_pkcs1_v15(uint8 *data, int data_len, int res_len, uint8 **res_p)
{
- int res;
+#ifdef HAVE_STRONG_RANDOM
uint8 *buf,
*p;
int pad_len = res_len - 2 - data_len;
@@ -49,11 +49,11 @@ pad_eme_pkcs1_v15(uint8 *data, int data_len, int res_len, uint8 **res_p)
buf = px_alloc(res_len);
buf[0] = 0x02;
- res = px_get_random_bytes(buf + 1, pad_len);
- if (res < 0)
+
+ if (!pg_strong_random((char *) buf + 1, pad_len))
{
px_free(buf);
- return res;
+ return PXE_NO_RANDOM;
}
/* pad must not contain zero bytes */
@@ -62,26 +62,26 @@ pad_eme_pkcs1_v15(uint8 *data, int data_len, int res_len, uint8 **res_p)
{
if (*p == 0)
{
- res = px_get_random_bytes(p, 1);
- if (res < 0)
+ if (!pg_strong_random((char *) p, 1))
+ {
+ px_memset(buf, 0, res_len);
+ px_free(buf);
break;
+ }
}
if (*p != 0)
p++;
}
- if (res < 0)
- {
- px_memset(buf, 0, res_len);
- px_free(buf);
- return res;
- }
-
buf[pad_len + 1] = 0;
memcpy(buf + pad_len + 2, data, data_len);
*res_p = buf;
return 0;
+
+#else
+ return PXE_NO_RANDOM;
+#endif
}
static int
diff --git a/contrib/pgcrypto/pgp-s2k.c b/contrib/pgcrypto/pgp-s2k.c
index 3551d44d62..a0fd8969ef 100644
--- a/contrib/pgcrypto/pgp-s2k.c
+++ b/contrib/pgcrypto/pgp-s2k.c
@@ -34,6 +34,8 @@
#include "px.h"
#include "pgp.h"
+#include "utils/backend_random.h"
+
static int
calc_s2k_simple(PGP_S2K *s2k, PX_MD *md, const uint8 *key,
unsigned key_len)
@@ -233,15 +235,14 @@ pgp_s2k_fill(PGP_S2K *s2k, int mode, int digest_algo, int count)
case PGP_S2K_SIMPLE:
break;
case PGP_S2K_SALTED:
- res = px_get_random_bytes(s2k->salt, PGP_S2K_SALT);
+ if (!pg_backend_random((char *) s2k->salt, PGP_S2K_SALT))
+ return PXE_NO_RANDOM;
break;
case PGP_S2K_ISALTED:
- res = px_get_random_bytes(s2k->salt, PGP_S2K_SALT);
- if (res < 0)
- break;
- res = px_get_random_bytes(&tmp, 1);
- if (res < 0)
- break;
+ if (!pg_backend_random((char *) s2k->salt, PGP_S2K_SALT))
+ return PXE_NO_RANDOM;
+ if (!pg_backend_random((char *) &tmp, 1))
+ return PXE_NO_RANDOM;
s2k->iter = decide_s2k_iter(tmp, count);
break;
default:
diff --git a/contrib/pgcrypto/px-crypt.c b/contrib/pgcrypto/px-crypt.c
index 3d42393850..6c72c4ae83 100644
--- a/contrib/pgcrypto/px-crypt.c
+++ b/contrib/pgcrypto/px-crypt.c
@@ -34,6 +34,7 @@
#include "px.h"
#include "px-crypt.h"
+#include "utils/backend_random.h"
static char *
run_crypt_des(const char *psw, const char *salt,
@@ -132,7 +133,6 @@ static struct generator gen_list[] = {
int
px_gen_salt(const char *salt_type, char *buf, int rounds)
{
- int res;
struct generator *g;
char *p;
char rbuf[16];
@@ -153,9 +153,8 @@ px_gen_salt(const char *salt_type, char *buf, int rounds)
return PXE_BAD_SALT_ROUNDS;
}
- res = px_get_random_bytes((uint8 *) rbuf, g->input_len);
- if (res < 0)
- return res;
+ if (!pg_backend_random(rbuf, g->input_len))
+ return PXE_NO_RANDOM;
p = g->gen(rounds, rbuf, g->input_len, buf, PX_MAX_SALT_LEN);
px_memset(rbuf, 0, sizeof(rbuf));
diff --git a/contrib/pgcrypto/px.c b/contrib/pgcrypto/px.c
index b01701ea75..a5c02f3612 100644
--- a/contrib/pgcrypto/px.c
+++ b/contrib/pgcrypto/px.c
@@ -51,7 +51,6 @@ static const struct error_desc px_err_list[] = {
{PXE_CIPHER_INIT, "Cipher cannot be initialized ?"},
{PXE_HASH_UNUSABLE_FOR_HMAC, "This hash algorithm is unusable for HMAC"},
{PXE_DEV_READ_ERROR, "Error reading from random device"},
- {PXE_OSSL_RAND_ERROR, "OpenSSL PRNG error"},
{PXE_BUG, "pgcrypto bug"},
{PXE_ARGUMENT_ERROR, "Illegal argument to function"},
{PXE_UNKNOWN_SALT_ALGO, "Unknown salt algorithm"},
@@ -86,6 +85,39 @@ static const struct error_desc px_err_list[] = {
{0, NULL},
};
+/*
+ * Call ereport(ERROR, ...), with an error code and message corresponding to
+ * the PXE_* error code given as argument.
+ *
+ * This is similar to px_strerror(err), but for some errors, we fill in the
+ * error code and detail fields more appropriately.
+ */
+void
+px_THROW_ERROR(int err)
+{
+ if (err == PXE_NO_RANDOM)
+ {
+#ifdef HAVE_STRONG_RANDOM
+ ereport(ERROR,
+ (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("could not generate a random number")));
+#else
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("pg_random_bytes() is not supported by this build"),
+ errdetail("This functionality requires a source of strong random numbers"),
+ errhint("You need to rebuild PostgreSQL using --enable-strong-random")));
+#endif
+ }
+ else
+ {
+ /* For other errors, use the message from the above list. */
+ ereport(ERROR,
+ (errcode(ERRCODE_EXTERNAL_ROUTINE_INVOCATION_EXCEPTION),
+ errmsg("%s", px_strerror(err))));
+ }
+}
+
const char *
px_strerror(int err)
{
diff --git a/contrib/pgcrypto/px.h b/contrib/pgcrypto/px.h
index 800c552bf6..00fc6f0c01 100644
--- a/contrib/pgcrypto/px.h
+++ b/contrib/pgcrypto/px.h
@@ -71,7 +71,6 @@ void px_free(void *p);
#define PXE_CIPHER_INIT -8
#define PXE_HASH_UNUSABLE_FOR_HMAC -9
#define PXE_DEV_READ_ERROR -10
-#define PXE_OSSL_RAND_ERROR -11
#define PXE_BUG -12
#define PXE_ARGUMENT_ERROR -13
#define PXE_UNKNOWN_SALT_ALGO -14
@@ -189,11 +188,7 @@ int px_find_hmac(const char *name, PX_HMAC **res);
int px_find_cipher(const char *name, PX_Cipher **res);
int px_find_combo(const char *name, PX_Combo **res);
-int px_get_random_bytes(uint8 *dst, unsigned count);
-int px_add_entropy(const uint8 *data, unsigned count);
-
-unsigned px_acquire_system_randomness(uint8 *dst);
-
+void px_THROW_ERROR(int err) pg_attribute_noreturn();
const char *px_strerror(int err);
const char *px_resolve_alias(const PX_Alias *aliases, const char *name);
diff --git a/contrib/pgcrypto/random.c b/contrib/pgcrypto/random.c
deleted file mode 100644
index d72679e412..0000000000
--- a/contrib/pgcrypto/random.c
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
- * random.c
- * Acquire randomness from system. For seeding RNG.
- *
- * Copyright (c) 2001 Marko Kreen
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * contrib/pgcrypto/random.c
- */
-
-#include "postgres.h"
-
-#include "px.h"
-#include "utils/memdebug.h"
-
-/* how many bytes to ask from system random provider */
-#define RND_BYTES 32
-
-/*
- * Try to read from /dev/urandom or /dev/random on these OS'es.
- *
- * The list can be pretty liberal, as the device not existing
- * is expected event.
- */
-#if defined(__linux__) || defined(__FreeBSD__) || defined(__OpenBSD__) \
- || defined(__NetBSD__) || defined(__DragonFly__) \
- || defined(__darwin__) || defined(__SOLARIS__) \
- || defined(__hpux) || defined(__HPUX__) \
- || defined(__CYGWIN__) || defined(_AIX)
-
-#define TRY_DEV_RANDOM
-
-#include
-#include
-
-static int
-safe_read(int fd, void *buf, size_t count)
-{
- int done = 0;
- char *p = buf;
- int res;
-
- while (count)
- {
- res = read(fd, p, count);
- if (res <= 0)
- {
- if (errno == EINTR)
- continue;
- return PXE_DEV_READ_ERROR;
- }
- p += res;
- done += res;
- count -= res;
- }
- return done;
-}
-
-static uint8 *
-try_dev_random(uint8 *dst)
-{
- int fd;
- int res;
-
- fd = open("/dev/urandom", O_RDONLY, 0);
- if (fd == -1)
- {
- fd = open("/dev/random", O_RDONLY, 0);
- if (fd == -1)
- return dst;
- }
- res = safe_read(fd, dst, RND_BYTES);
- close(fd);
- if (res > 0)
- dst += res;
- return dst;
-}
-#endif
-
-/*
- * Try to find randomness on Windows
- */
-#ifdef WIN32
-
-#define TRY_WIN32_GENRAND
-#define TRY_WIN32_PERFC
-
-#include
-#include
-
-/*
- * this function is from libtomcrypt
- *
- * try to use Microsoft crypto API
- */
-static uint8 *
-try_win32_genrand(uint8 *dst)
-{
- int res;
- HCRYPTPROV h = 0;
-
- res = CryptAcquireContext(&h, NULL, MS_DEF_PROV, PROV_RSA_FULL,
- (CRYPT_VERIFYCONTEXT | CRYPT_MACHINE_KEYSET));
- if (!res)
- res = CryptAcquireContext(&h, NULL, MS_DEF_PROV, PROV_RSA_FULL,
- CRYPT_VERIFYCONTEXT | CRYPT_MACHINE_KEYSET | CRYPT_NEWKEYSET);
- if (!res)
- return dst;
-
- res = CryptGenRandom(h, RND_BYTES, dst);
- if (res == TRUE)
- dst += RND_BYTES;
-
- CryptReleaseContext(h, 0);
- return dst;
-}
-
-static uint8 *
-try_win32_perfc(uint8 *dst)
-{
- int res;
- LARGE_INTEGER time;
-
- res = QueryPerformanceCounter(&time);
- if (!res)
- return dst;
-
- memcpy(dst, &time, sizeof(time));
- return dst + sizeof(time);
-}
-#endif /* WIN32 */
-
-
-/*
- * If we are not on Windows, then hopefully we are
- * on a unix-like system. Use the usual suspects
- * for randomness.
- */
-#ifndef WIN32
-
-#define TRY_UNIXSTD
-
-#include
-#include
-#include
-#include
-
-/*
- * Everything here is predictible, only needs some patience.
- *
- * But there is a chance that the system-specific functions
- * did not work. So keep faith and try to slow the attacker down.
- */
-static uint8 *
-try_unix_std(uint8 *dst)
-{
- pid_t pid;
- int x;
- PX_MD *md;
- struct timeval tv;
- int res;
-
- /* process id */
- pid = getpid();
- memcpy(dst, (uint8 *) &pid, sizeof(pid));
- dst += sizeof(pid);
-
- /* time */
- gettimeofday(&tv, NULL);
- memcpy(dst, (uint8 *) &tv, sizeof(tv));
- dst += sizeof(tv);
-
- /* pointless, but should not hurt */
- x = random();
- memcpy(dst, (uint8 *) &x, sizeof(x));
- dst += sizeof(x);
-
- /* hash of uninitialized stack and heap allocations */
- res = px_find_digest("sha1", &md);
- if (res >= 0)
- {
- uint8 *ptr;
- uint8 stack[8192];
- int alloc = 32 * 1024;
-
- VALGRIND_MAKE_MEM_DEFINED(stack, sizeof(stack));
- px_md_update(md, stack, sizeof(stack));
- ptr = px_alloc(alloc);
- VALGRIND_MAKE_MEM_DEFINED(ptr, alloc);
- px_md_update(md, ptr, alloc);
- px_free(ptr);
-
- px_md_finish(md, dst);
- px_md_free(md);
-
- dst += 20;
- }
-
- return dst;
-}
-#endif
-
-/*
- * try to extract some randomness for initial seeding
- *
- * dst should have room for 1024 bytes.
- */
-unsigned
-px_acquire_system_randomness(uint8 *dst)
-{
- uint8 *p = dst;
-
-#ifdef TRY_DEV_RANDOM
- p = try_dev_random(p);
-#endif
-#ifdef TRY_WIN32_GENRAND
- p = try_win32_genrand(p);
-#endif
-#ifdef TRY_WIN32_PERFC
- p = try_win32_perfc(p);
-#endif
-#ifdef TRY_UNIXSTD
- p = try_unix_std(p);
-#endif
- return p - dst;
-}
diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml
index 296611d425..98594a487e 100644
--- a/doc/src/sgml/installation.sgml
+++ b/doc/src/sgml/installation.sgml
@@ -1096,6 +1096,23 @@ su - postgres
+
+
--disable-strong-random
+
+
+ Allow the build to succeed even if PostgreSQL>
+ has no support for strong random numbers on the platform.
+ A source of random numbers is needed for some authentication
+ protocols, as well as some routines in
+ module. --disable-strong-random disables functionality that
+ requires cryptographically strong random numbers, and substitutes
+ a weak pseudo-random-number-generator for the generation of
+ authentication salt values and query cancel keys. It may make
+ authentication less secure.
+
+
+
+
--disable-thread-safety
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index aa1fa658ed..d39d6ca867 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -197,6 +197,7 @@ enable_dtrace = @enable_dtrace@
enable_coverage = @enable_coverage@
enable_tap_tests = @enable_tap_tests@
enable_thread_safety = @enable_thread_safety@
+enable_strong_random = @enable_strong_random@
python_includespec = @python_includespec@
python_libdir = @python_libdir@
diff --git a/src/backend/libpq/auth.c b/src/backend/libpq/auth.c
index 0ba8530114..5d166db574 100644
--- a/src/backend/libpq/auth.c
+++ b/src/backend/libpq/auth.c
@@ -33,6 +33,7 @@
#include "miscadmin.h"
#include "replication/walsender.h"
#include "storage/ipc.h"
+#include "utils/backend_random.h"
/*----------------------------------------------------------------
@@ -43,9 +44,21 @@ static void sendAuthRequest(Port *port, AuthRequest areq, char *extradata,
int extralen);
static void auth_failed(Port *port, int status, char *logdetail);
static char *recv_password_packet(Port *port);
-static int recv_and_check_password_packet(Port *port, char **logdetail);
+/*----------------------------------------------------------------
+ * MD5 authentication
+ *----------------------------------------------------------------
+ */
+static int CheckMD5Auth(Port *port, char **logdetail);
+
+/*----------------------------------------------------------------
+ * Plaintext password authentication
+ *----------------------------------------------------------------
+ */
+
+static int CheckPasswordAuth(Port *port, char **logdetail);
+
/*----------------------------------------------------------------
* Ident authentication
*----------------------------------------------------------------
@@ -536,13 +549,11 @@ ClientAuthentication(Port *port)
(errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
errmsg("MD5 authentication is not supported when \"db_user_namespace\" is enabled")));
/* include the salt to use for computing the response */
- sendAuthRequest(port, AUTH_REQ_MD5, port->md5Salt, 4);
- status = recv_and_check_password_packet(port, &logdetail);
+ status = CheckMD5Auth(port, &logdetail);
break;
case uaPassword:
- sendAuthRequest(port, AUTH_REQ_PASSWORD, NULL, 0);
- status = recv_and_check_password_packet(port, &logdetail);
+ status = CheckPasswordAuth(port, &logdetail);
break;
case uaPAM:
@@ -696,23 +707,48 @@ recv_password_packet(Port *port)
*----------------------------------------------------------------
*/
-/*
- * Called when we have sent an authorization request for a password.
- * Get the response and check it.
- * On error, optionally store a detail string at *logdetail.
+static int
+CheckMD5Auth(Port *port, char **logdetail)
+{
+ char md5Salt[4]; /* Password salt */
+ char *passwd;
+ int result;
+
+ pg_backend_random(md5Salt, 4);
+
+ sendAuthRequest(port, AUTH_REQ_MD5, md5Salt, 4);
+
+ passwd = recv_password_packet(port);
+
+ if (passwd == NULL)
+ return STATUS_EOF; /* client wouldn't send password */
+
+ result = md5_crypt_verify(port, port->user_name, passwd, md5Salt, 4, logdetail);
+
+ pfree(passwd);
+
+ return result;
+}
+
+/*----------------------------------------------------------------
+ * Plaintext password authentication
+ *----------------------------------------------------------------
*/
+
static int
-recv_and_check_password_packet(Port *port, char **logdetail)
+CheckPasswordAuth(Port *port, char **logdetail)
{
char *passwd;
int result;
+ sendAuthRequest(port, AUTH_REQ_PASSWORD, NULL, 0);
+
passwd = recv_password_packet(port);
if (passwd == NULL)
return STATUS_EOF; /* client wouldn't send password */
- result = md5_crypt_verify(port, port->user_name, passwd, logdetail);
+ result = md5_crypt_verify(port, port->user_name, passwd, NULL, 0, logdetail);
pfree(passwd);
@@ -920,7 +956,7 @@ pg_GSS_recvauth(Port *port)
(unsigned int) port->gss->outbuf.length);
sendAuthRequest(port, AUTH_REQ_GSS_CONT,
- port->gss->outbuf.value, port->gss->outbuf.length);
+ port->gss->outbuf.value, port->gss->outbuf.length);
gss_release_buffer(&lmin_s, &port->gss->outbuf);
}
@@ -1166,7 +1202,7 @@ pg_SSPI_recvauth(Port *port)
port->gss->outbuf.value = outbuf.pBuffers[0].pvBuffer;
sendAuthRequest(port, AUTH_REQ_GSS_CONT,
- port->gss->outbuf.value, port->gss->outbuf.length);
+ port->gss->outbuf.value, port->gss->outbuf.length);
FreeContextBuffer(outbuf.pBuffers[0].pvBuffer);
}
diff --git a/src/backend/libpq/crypt.c b/src/backend/libpq/crypt.c
index d84a180330..35b657adbb 100644
--- a/src/backend/libpq/crypt.c
+++ b/src/backend/libpq/crypt.c
@@ -36,7 +36,7 @@
*/
int
md5_crypt_verify(const Port *port, const char *role, char *client_pass,
- char **logdetail)
+ char *md5_salt, int md5_salt_len, char **logdetail)
{
int retval = STATUS_ERROR;
char *shadow_pass,
@@ -91,13 +91,14 @@ md5_crypt_verify(const Port *port, const char *role, char *client_pass,
switch (port->hba->auth_method)
{
case uaMD5:
+ Assert(md5_salt != NULL && md5_salt_len > 0);
crypt_pwd = palloc(MD5_PASSWD_LEN + 1);
if (isMD5(shadow_pass))
{
/* stored password already encrypted, only do salt */
if (!pg_md5_encrypt(shadow_pass + strlen("md5"),
- port->md5Salt,
- sizeof(port->md5Salt), crypt_pwd))
+ md5_salt, md5_salt_len,
+ crypt_pwd))
{
pfree(crypt_pwd);
return STATUS_ERROR;
@@ -118,8 +119,7 @@ md5_crypt_verify(const Port *port, const char *role, char *client_pass,
return STATUS_ERROR;
}
if (!pg_md5_encrypt(crypt_pwd2 + strlen("md5"),
- port->md5Salt,
- sizeof(port->md5Salt),
+ md5_salt, md5_salt_len,
crypt_pwd))
{
pfree(crypt_pwd);
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 24add74512..f0ed523371 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -164,7 +164,7 @@
typedef struct bkend
{
pid_t pid; /* process id of backend */
- long cancel_key; /* cancel key for cancels for this backend */
+ int32 cancel_key; /* cancel key for cancels for this backend */
int child_slot; /* PMChildSlot for this backend, if any */
/*
@@ -358,13 +358,15 @@ static volatile bool avlauncher_needs_signal = false;
static volatile bool StartWorkerNeeded = true;
static volatile bool HaveCrashedWorker = false;
+#ifndef HAVE_STRONG_RANDOM
/*
- * State for assigning random salts and cancel keys.
+ * State for assigning cancel keys.
* Also, the global MyCancelKey passes the cancel key assigned to a given
* backend from the postmaster to that backend (via fork).
*/
static unsigned int random_seed = 0;
static struct timeval random_start_time;
+#endif
#ifdef USE_BONJOUR
static DNSServiceRef bonjour_sdref = NULL;
@@ -403,8 +405,7 @@ static void processCancelRequest(Port *port, void *pkt);
static int initMasks(fd_set *rmask);
static void report_fork_failure_to_client(Port *port, int errnum);
static CAC_state canAcceptConnections(void);
-static long PostmasterRandom(void);
-static void RandomSalt(char *salt, int len);
+static bool RandomCancelKey(int32 *cancel_key);
static void signal_child(pid_t pid, int signal);
static bool SignalSomeChildren(int signal, int targets);
static void TerminateChildren(int signal);
@@ -471,7 +472,7 @@ typedef struct
InheritableSocket portsocket;
char DataDir[MAXPGPATH];
pgsocket ListenSocket[MAXLISTEN];
- long MyCancelKey;
+ int32 MyCancelKey;
int MyPMChildSlot;
#ifndef WIN32
unsigned long UsedShmemSegID;
@@ -1292,8 +1293,10 @@ PostmasterMain(int argc, char *argv[])
* Remember postmaster startup time
*/
PgStartTime = GetCurrentTimestamp();
- /* PostmasterRandom wants its own copy */
+#ifndef HAVE_STRONG_RANDOM
+ /* RandomCancelKey wants its own copy */
gettimeofday(&random_start_time, NULL);
+#endif
/*
* We're ready to rock and roll...
@@ -2344,15 +2347,6 @@ ConnCreate(int serverFd)
return NULL;
}
- /*
- * Precompute password salt values to use for this connection. It's
- * slightly annoying to do this long in advance of knowing whether we'll
- * need 'em or not, but we must do the random() calls before we fork, not
- * after. Else the postmaster's random sequence won't get advanced, and
- * all backends would end up using the same salt...
- */
- RandomSalt(port->md5Salt, sizeof(port->md5Salt));
-
/*
* Allocate GSSAPI specific state struct
*/
@@ -3905,7 +3899,14 @@ BackendStartup(Port *port)
* backend will have its own copy in the forked-off process' value of
* MyCancelKey, so that it can transmit the key to the frontend.
*/
- MyCancelKey = PostmasterRandom();
+ if (!RandomCancelKey(&MyCancelKey))
+ {
+ ereport(LOG,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("could not acquire random number")));
+ return STATUS_ERROR;
+ }
+
bn->cancel_key = MyCancelKey;
/* Pass down canAcceptConnections state */
@@ -4218,8 +4219,10 @@ BackendRun(Port *port)
* generator state. We have to clobber the static random_seed *and* start
* a new random sequence in the random() library function.
*/
+#ifndef HAVE_STRONG_RANDOM
random_seed = 0;
random_start_time.tv_usec = 0;
+#endif
/* slightly hacky way to convert timestamptz into integers */
TimestampDifference(0, port->SessionStartTime, &secs, &usecs);
srandom((unsigned int) (MyProcPid ^ (usecs << 12) ^ secs));
@@ -5068,63 +5071,42 @@ StartupPacketTimeoutHandler(void)
/*
- * RandomSalt
+ * Generate a random cancel key.
*/
-static void
-RandomSalt(char *salt, int len)
+static bool
+RandomCancelKey(int32 *cancel_key)
{
- long rand;
- int i;
-
+#ifdef HAVE_STRONG_RANDOM
+ return pg_strong_random((char *) cancel_key, sizeof(int32));
+#else
/*
- * We use % 255, sacrificing one possible byte value, so as to ensure that
- * all bits of the random() value participate in the result. While at it,
- * add one to avoid generating any null bytes.
+ * If built with --disable-strong-random, use plain old erand48.
+ *
+ * We cannot use pg_backend_random() in postmaster, because it stores
+ * its state in shared memory.
*/
- for (i = 0; i < len; i++)
- {
- rand = PostmasterRandom();
- salt[i] = (rand % 255) + 1;
- }
-}
+ static unsigned short seed[3];
-/*
- * PostmasterRandom
- *
- * Caution: use this only for values needed during connection-request
- * processing. Otherwise, the intended property of having an unpredictable
- * delay between random_start_time and random_stop_time will be broken.
- */
-static long
-PostmasterRandom(void)
-{
/*
* Select a random seed at the time of first receiving a request.
*/
if (random_seed == 0)
{
- do
- {
- struct timeval random_stop_time;
+ struct timeval random_stop_time;
- gettimeofday(&random_stop_time, NULL);
+ gettimeofday(&random_stop_time, NULL);
- /*
- * We are not sure how much precision is in tv_usec, so we swap
- * the high and low 16 bits of 'random_stop_time' and XOR them
- * with 'random_start_time'. On the off chance that the result is
- * 0, we loop until it isn't.
- */
- random_seed = random_start_time.tv_usec ^
- ((random_stop_time.tv_usec << 16) |
- ((random_stop_time.tv_usec >> 16) & 0xffff));
- }
- while (random_seed == 0);
+ seed[0] = (unsigned short) random_start_time.tv_usec;
+ seed[1] = (unsigned short) (random_stop_time.tv_usec) ^ (random_start_time.tv_usec >> 16);
+ seed[2] = (unsigned short) (random_stop_time.tv_usec >> 16);
- srandom(random_seed);
+ random_seed = 1;
}
- return random();
+ *cancel_key = pg_jrand48(seed);
+
+ return true;
+#endif
}
/*
@@ -5295,16 +5277,23 @@ StartAutovacuumWorker(void)
*/
if (canAcceptConnections() == CAC_OK)
{
+ /*
+ * Compute the cancel key that will be assigned to this session.
+ * We probably don't need cancel keys for autovac workers, but
+ * we'd better have something random in the field to prevent
+ * unfriendly people from sending cancels to them.
+ */
+ if (!RandomCancelKey(&MyCancelKey))
+ {
+ ereport(LOG,
+ (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("could not acquire random number")));
+ return;
+ }
+
bn = (Backend *) malloc(sizeof(Backend));
if (bn)
{
- /*
- * Compute the cancel key that will be assigned to this session.
- * We probably don't need cancel keys for autovac workers, but
- * we'd better have something random in the field to prevent
- * unfriendly people from sending cancels to them.
- */
- MyCancelKey = PostmasterRandom();
bn->cancel_key = MyCancelKey;
/* Autovac workers are not dead_end and need a child slot */
@@ -5592,8 +5581,25 @@ bgworker_should_start_now(BgWorkerStartTime start_time)
static bool
assign_backendlist_entry(RegisteredBgWorker *rw)
{
- Backend *bn = malloc(sizeof(Backend));
+ Backend *bn;
+ /*
+ * Compute the cancel key that will be assigned to this session. We
+ * probably don't need cancel keys for background workers, but we'd better
+ * have something random in the field to prevent unfriendly people from
+ * sending cancels to them.
+ */
+ if (!RandomCancelKey(&MyCancelKey))
+ {
+ ereport(LOG,
+ (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("could not acquire random number")));
+
+ rw->rw_crashed_at = GetCurrentTimestamp();
+ return false;
+ }
+
+ bn = malloc(sizeof(Backend));
if (bn == NULL)
{
ereport(LOG,
@@ -5610,15 +5616,7 @@ assign_backendlist_entry(RegisteredBgWorker *rw)
return false;
}
- /*
- * Compute the cancel key that will be assigned to this session. We
- * probably don't need cancel keys for background workers, but we'd better
- * have something random in the field to prevent unfriendly people from
- * sending cancels to them.
- */
- MyCancelKey = PostmasterRandom();
bn->cancel_key = MyCancelKey;
-
bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
bn->bkend_type = BACKEND_TYPE_BGWORKER;
bn->dead_end = false;
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index c04b17fa8e..01bddcea16 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -43,6 +43,7 @@
#include "storage/procsignal.h"
#include "storage/sinvaladt.h"
#include "storage/spin.h"
+#include "utils/backend_random.h"
#include "utils/snapmgr.h"
@@ -141,6 +142,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
size = add_size(size, BTreeShmemSize());
size = add_size(size, SyncScanShmemSize());
size = add_size(size, AsyncShmemSize());
+ size = add_size(size, BackendRandomShmemSize());
#ifdef EXEC_BACKEND
size = add_size(size, ShmemBackendArraySize());
#endif
@@ -253,6 +255,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
BTreeShmemInit();
SyncScanShmemInit();
AsyncShmemInit();
+ BackendRandomShmemInit();
#ifdef EXEC_BACKEND
diff --git a/src/backend/storage/lmgr/lwlocknames.txt b/src/backend/storage/lmgr/lwlocknames.txt
index f8996cd21a..0dcf7effd4 100644
--- a/src/backend/storage/lmgr/lwlocknames.txt
+++ b/src/backend/storage/lmgr/lwlocknames.txt
@@ -47,3 +47,4 @@ CommitTsLock 39
ReplicationOriginLock 40
MultiXactTruncationLock 41
OldSnapshotTimeMapLock 42
+BackendRandomLock 43
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index c564ae396d..6ab03cea17 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -38,7 +38,7 @@ volatile uint32 CritSectionCount = 0;
int MyProcPid;
pg_time_t MyStartTime;
struct Port *MyProcPort;
-long MyCancelKey;
+int32 MyCancelKey;
int MyPMChildSlot;
/*
diff --git a/src/backend/utils/misc/Makefile b/src/backend/utils/misc/Makefile
index a5b487d0b6..0ad1b8b595 100644
--- a/src/backend/utils/misc/Makefile
+++ b/src/backend/utils/misc/Makefile
@@ -14,8 +14,9 @@ include $(top_builddir)/src/Makefile.global
override CPPFLAGS := -I. -I$(srcdir) $(CPPFLAGS)
-OBJS = guc.o help_config.o pg_config.o pg_controldata.o pg_rusage.o \
- ps_status.o rls.o sampling.o superuser.o timeout.o tzparser.o
+OBJS = backend_random.o guc.o help_config.o pg_config.o pg_controldata.o \
+ pg_rusage.o ps_status.o rls.o sampling.o superuser.o timeout.o \
+ tzparser.o
# This location might depend on the installation directories. Therefore
# we can't subsitute it into pg_config.h.
diff --git a/src/backend/utils/misc/backend_random.c b/src/backend/utils/misc/backend_random.c
new file mode 100644
index 0000000000..1bc239d1dd
--- /dev/null
+++ b/src/backend/utils/misc/backend_random.c
@@ -0,0 +1,158 @@
+/*-------------------------------------------------------------------------
+ *
+ * backend_random.c
+ * Backend random number generation routine.
+ *
+ * pg_backend_random() function fills a buffer with random bytes. Normally,
+ * it is just a thin wrapper around pg_strong_random(), but when compiled
+ * with --disable-strong-random, we provide a built-in implementation.
+ *
+ * This function is used for generating nonces in authentication, and for
+ * random salt generation in pgcrypto. The built-in implementation is not
+ * cryptographically strong, but if the user asked for it, we'll go ahead
+ * and use it anyway.
+ *
+ * The built-in implementation uses the standard erand48 algorithm, with
+ * a seed shared between all backends.
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/misc/backend_random.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include
+
+#include "miscadmin.h"
+#include "storage/lwlock.h"
+#include "storage/shmem.h"
+#include "utils/backend_random.h"
+#include "utils/timestamp.h"
+
+#ifdef HAVE_STRONG_RANDOM
+
+Size
+BackendRandomShmemSize(void)
+{
+ return 0;
+}
+
+void
+BackendRandomShmemInit(void)
+{
+ /* do nothing */
+}
+
+bool
+pg_backend_random(char *dst, int len)
+{
+ /* should not be called in postmaster */
+ Assert (IsUnderPostmaster || !IsPostmasterEnvironment);
+
+ return pg_strong_random(dst, len);
+}
+
+#else
+
+/*
+ * Seed for the PRNG, stored in shared memory.
+ *
+ * Protected by BackendRandomLock.
+ */
+typedef struct
+{
+ bool initialized;
+ unsigned short seed[3];
+} BackendRandomShmemStruct;
+
+static BackendRandomShmemStruct *BackendRandomShmem;
+
+Size
+BackendRandomShmemSize(void)
+{
+ return sizeof(BackendRandomShmemStruct);
+}
+
+void
+BackendRandomShmemInit(void)
+{
+ bool found;
+
+ BackendRandomShmem = (BackendRandomShmemStruct *)
+ ShmemInitStruct("Backend PRNG state",
+ BackendRandomShmemSize(),
+ &found);
+
+ if (!IsUnderPostmaster)
+ {
+ Assert(!found);
+
+ BackendRandomShmem->initialized = false;
+ }
+ else
+ Assert(found);
+}
+
+bool
+pg_backend_random(char *dst, int len)
+{
+ int i;
+ char *end = dst + len;
+
+ /* should not be called in postmaster */
+ Assert (IsUnderPostmaster || !IsPostmasterEnvironment);
+
+ LWLockAcquire(BackendRandomLock, LW_EXCLUSIVE);
+
+ /*
+ * Seed the PRNG on the first use.
+ */
+ if (!BackendRandomShmem->initialized)
+ {
+ struct timeval now;
+
+ gettimeofday(&now, NULL);
+
+ BackendRandomShmem->seed[0] = now.tv_sec;
+ BackendRandomShmem->seed[1] = (unsigned short) (now.tv_usec);
+ BackendRandomShmem->seed[2] = (unsigned short) (now.tv_usec >> 16);
+
+ /*
+ * Mix in the cancel key, generated by the postmaster. This adds
+ * what little entropy the postmaster had to the seed.
+ */
+ BackendRandomShmem->seed[0] ^= (MyCancelKey);
+ BackendRandomShmem->seed[1] ^= (MyCancelKey >> 16);
+
+ BackendRandomShmem->initialized = true;
+ }
+
+ for (i = 0; dst < end; i++)
+ {
+ uint32 r;
+ int j;
+
+ /*
+ * pg_jrand48 returns a 32-bit integer. Fill the next 4 bytes from it.
+ */
+ r = (uint32) pg_jrand48(BackendRandomShmem->seed);
+
+ for (j = 0; j < 4 && dst < end; j++)
+ {
+ *(dst++) = (char) (r & 0xFF);
+ r >>= 8;
+ }
+ }
+ LWLockRelease(BackendRandomLock);
+
+ return true;
+}
+
+
+#endif /* HAVE_STRONG_RANDOM */
diff --git a/src/include/libpq/crypt.h b/src/include/libpq/crypt.h
index 5725bb409e..f51e0fd46b 100644
--- a/src/include/libpq/crypt.h
+++ b/src/include/libpq/crypt.h
@@ -16,6 +16,6 @@
#include "libpq/libpq-be.h"
extern int md5_crypt_verify(const Port *port, const char *role,
- char *client_pass, char **logdetail);
+ char *client_pass, char *md5_salt, int md5_salt_len, char **logdetail);
#endif
diff --git a/src/include/libpq/libpq-be.h b/src/include/libpq/libpq-be.h
index b91eca5b2c..66647ad003 100644
--- a/src/include/libpq/libpq-be.h
+++ b/src/include/libpq/libpq-be.h
@@ -144,7 +144,6 @@ typedef struct Port
* Information that needs to be held during the authentication cycle.
*/
HbaLine *hba;
- char md5Salt[4]; /* Password salt */
/*
* Information that really has no business at all being in struct Port,
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index d06eca54b4..999440fdec 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -163,7 +163,7 @@ extern PGDLLIMPORT int MyProcPid;
extern PGDLLIMPORT pg_time_t MyStartTime;
extern PGDLLIMPORT struct Port *MyProcPort;
extern PGDLLIMPORT struct Latch *MyLatch;
-extern long MyCancelKey;
+extern int32 MyCancelKey;
extern int MyPMChildSlot;
extern char OutputFileName[];
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 7dbfa90bf4..42a3fc862e 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -497,6 +497,9 @@
/* Define to 1 if you have the `strlcpy' function. */
#undef HAVE_STRLCPY
+/* Define to use have a strong random number source */
+#undef HAVE_STRONG_RANDOM
+
/* Define to 1 if you have the `strtoll' function. */
#undef HAVE_STRTOLL
@@ -814,6 +817,9 @@
/* Define to 1 to build with BSD Authentication support. (--with-bsd-auth) */
#undef USE_BSD_AUTH
+/* Define to use /dev/urandom for random number generation */
+#undef USE_DEV_URANDOM
+
/* Define to 1 if you want float4 values to be passed by value.
(--enable-float4-byval) */
#undef USE_FLOAT4_BYVAL
@@ -842,6 +848,9 @@
/* Define to build with OpenSSL support. (--with-openssl) */
#undef USE_OPENSSL
+/* Define to use OpenSSL for random number generation */
+#undef USE_OPENSSL_RANDOM
+
/* Define to 1 to build with PAM support. (--with-pam) */
#undef USE_PAM
@@ -869,6 +878,9 @@
/* Define to select unnamed POSIX semaphores. */
#undef USE_UNNAMED_POSIX_SEMAPHORES
+/* Define to use native Windows API for random number generation */
+#undef USE_WIN32_RANDOM
+
/* Define to select Win32-style semaphores. */
#undef USE_WIN32_SEMAPHORES
diff --git a/src/include/pg_config.h.win32 b/src/include/pg_config.h.win32
index 8892c3cb4f..ceb8b7956e 100644
--- a/src/include/pg_config.h.win32
+++ b/src/include/pg_config.h.win32
@@ -348,6 +348,9 @@
/* Define to 1 if you have the header file. */
#define HAVE_STRING_H 1
+/* Define to use have a strong random number source */
+#define HAVE_STRONG_RANDOM 1
+
/* Define to 1 if you have the `strtoll' function. */
//#define HAVE_STRTOLL 1
@@ -616,6 +619,9 @@
/* Define to 1 to build with BSD Authentication support. (--with-bsd-auth) */
/* #undef USE_BSD_AUTH */
+/* Define to use /dev/urandom for random number generation */
+/* #undef USE_DEV_URANDOM */
+
/* Define to 1 if you want 64-bit integer timestamp and interval support.
(--enable-integer-datetimes) */
/* #undef USE_INTEGER_DATETIMES */
@@ -629,6 +635,9 @@
/* Define to build with OpenSSL support. (--with-openssl) */
/* #undef USE_OPENSSL */
+/* Define to use OpenSSL for random number generation */
+/* #undef USE_OPENSSL_RANDOM */
+
/* Define to 1 to build with PAM support. (--with-pam) */
/* #undef USE_PAM */
@@ -657,6 +666,9 @@
/* Define to select unnamed POSIX semaphores. */
/* #undef USE_UNNAMED_POSIX_SEMAPHORES */
+/* Define to use native Windows API for random number generation */
+#define USE_WIN32_RANDOM 1
+
/* Define to select Win32-style semaphores. */
#define USE_WIN32_SEMAPHORES 1
diff --git a/src/include/port.h b/src/include/port.h
index 8a63958535..f2b9882b7b 100644
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -361,6 +361,7 @@ extern off_t ftello(FILE *stream);
extern double pg_erand48(unsigned short xseed[3]);
extern long pg_lrand48(void);
+extern long pg_jrand48(unsigned short xseed[3]);
extern void pg_srand48(long seed);
#ifndef HAVE_FLS
@@ -454,6 +455,11 @@ extern int pg_codepage_to_encoding(UINT cp);
extern char *inet_net_ntop(int af, const void *src, int bits,
char *dst, size_t size);
+/* port/pg_strong_random.c */
+#ifdef HAVE_STRONG_RANDOM
+extern bool pg_strong_random(void *buf, size_t len);
+#endif
+
/* port/pgcheckdir.c */
extern int pg_check_dir(const char *dir);
diff --git a/src/include/utils/backend_random.h b/src/include/utils/backend_random.h
new file mode 100644
index 0000000000..16a6a26523
--- /dev/null
+++ b/src/include/utils/backend_random.h
@@ -0,0 +1,19 @@
+/*-------------------------------------------------------------------------
+ *
+ * backend_random.h
+ * Declarations for backend random number generation
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ *
+ * src/include/utils/backend_random.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef BACKEND_RANDOM_H
+#define BACKEND_RANDOM_H
+
+extern Size BackendRandomShmemSize(void);
+extern void BackendRandomShmemInit(void);
+extern bool pg_backend_random(char *dst, int len);
+
+#endif /* BACKEND_RANDOM_H */
diff --git a/src/port/Makefile b/src/port/Makefile
index bc9b63add0..81f01b25bb 100644
--- a/src/port/Makefile
+++ b/src/port/Makefile
@@ -35,6 +35,10 @@ OBJS = $(LIBOBJS) $(PG_CRC32C_OBJS) chklocale.o erand48.o inet_net_ntop.o \
pgstrcasecmp.o pqsignal.o \
qsort.o qsort_arg.o quotes.o sprompt.o tar.o thread.o
+ifeq ($(enable_strong_random), yes)
+OBJS += pg_strong_random.o
+endif
+
# foo_srv.o and foo.o are both built from foo.c, but only foo.o has -DFRONTEND
OBJS_SRV = $(OBJS:%.o=%_srv.o)
diff --git a/src/port/erand48.c b/src/port/erand48.c
index 9d471197c3..716816bc36 100644
--- a/src/port/erand48.c
+++ b/src/port/erand48.c
@@ -91,6 +91,13 @@ pg_lrand48(void)
return ((long) _rand48_seed[2] << 15) + ((long) _rand48_seed[1] >> 1);
}
+long
+pg_jrand48(unsigned short xseed[3])
+{
+ _dorand48(xseed);
+ return ((long) xseed[2] << 16) + ((long) xseed[1]);
+}
+
void
pg_srand48(long seed)
{
diff --git a/src/port/pg_strong_random.c b/src/port/pg_strong_random.c
new file mode 100644
index 0000000000..6d3aa38efd
--- /dev/null
+++ b/src/port/pg_strong_random.c
@@ -0,0 +1,149 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_strong_random.c
+ * generate a cryptographically secure random number
+ *
+ * Our definition of "strong" is that it's suitable for generating random
+ * salts and query cancellation keys, during authentication.
+ *
+ * Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/port/pg_strong_random.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef FRONTEND
+#include "postgres.h"
+#else
+#include "postgres_fe.h"
+#endif
+
+#include
+#include
+#include
+
+#ifdef USE_OPENSSL
+#include
+#endif
+#ifdef WIN32
+#include
+#endif
+
+#ifdef WIN32
+/*
+ * Cache a global crypto provider that only gets freed when the process
+ * exits, in case we need random numbers more than once.
+ */
+static HCRYPTPROV hProvider = 0;
+#endif
+
+#if defined(USE_DEV_URANDOM)
+/*
+ * Read (random) bytes from a file.
+ */
+static bool
+random_from_file(char *filename, void *buf, size_t len)
+{
+ int f;
+ char *p = buf;
+ ssize_t res;
+
+ f = open(filename, O_RDONLY, 0);
+ if (f == -1)
+ return false;
+
+ while (len)
+ {
+ res = read(f, p, len);
+ if (res <= 0)
+ {
+ if (errno == EINTR)
+ continue; /* interrupted by signal, just retry */
+
+ close(f);
+ return false;
+ }
+
+ p += res;
+ len -= res;
+ }
+
+ close(f);
+ return true;
+}
+#endif
+
+/*
+ * pg_strong_random
+ *
+ * Generate requested number of random bytes. The returned bytes are
+ * cryptographically secure, suitable for use e.g. in authentication.
+ *
+ * We rely on system facilities for actually generating the numbers.
+ * We support a number of sources:
+ *
+ * 1. OpenSSL's RAND_bytes()
+ * 2. Windows' CryptGenRandom() function
+ * 3. /dev/urandom
+ *
+ * The configure script will choose which one to use, and set
+ * a USE_*_RANDOM flag accordingly.
+ *
+ * Returns true on success, and false if none of the sources
+ * were available. NB: It is important to check the return value!
+ * Proceeding with key generation when no random data was available
+ * would lead to predictable keys and security issues.
+ */
+bool
+pg_strong_random(void *buf, size_t len)
+{
+ /*
+ * When built with OpenSSL, use OpenSSL's RAND_bytes function.
+ */
+#if defined(USE_OPENSSL_RANDOM)
+ if (RAND_bytes(buf, len) == 1)
+ return true;
+ return false;
+
+ /*
+ * Windows has CryptoAPI for strong cryptographic numbers.
+ */
+#elif defined(USE_WIN32_RANDOM)
+ if (hProvider == 0)
+ {
+ if (!CryptAcquireContext(&hProvider,
+ NULL,
+ MS_DEF_PROV,
+ PROV_RSA_FULL,
+ CRYPT_VERIFYCONTEXT | CRYPT_SILENT))
+ {
+ /*
+ * On failure, set back to 0 in case the value was for some reason
+ * modified.
+ */
+ hProvider = 0;
+ }
+ }
+ /* Re-check in case we just retrieved the provider */
+ if (hProvider != 0)
+ {
+ if (CryptGenRandom(hProvider, len, buf))
+ return true;
+ }
+ return false;
+
+ /*
+ * Read /dev/urandom ourselves.
+ */
+#elif defined(USE_DEV_URANDOM)
+ if (random_from_file("/dev/urandom", buf, len))
+ return true;
+ return false;
+
+#else
+ /* The autoconf script should not have allowed this */
+#error no source of random numbers configured
+#endif
+}
diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm
index de764dd4d4..db566f9c88 100644
--- a/src/tools/msvc/Mkvcbuild.pm
+++ b/src/tools/msvc/Mkvcbuild.pm
@@ -91,8 +91,8 @@ sub mkvcbuild
chklocale.c crypt.c fls.c fseeko.c getrusage.c inet_aton.c random.c
srandom.c getaddrinfo.c gettimeofday.c inet_net_ntop.c kill.c open.c
erand48.c snprintf.c strlcat.c strlcpy.c dirmod.c noblock.c path.c
- pgcheckdir.c pgmkdirp.c pgsleep.c pgstrcasecmp.c pqsignal.c
- mkdtemp.c qsort.c qsort_arg.c quotes.c system.c
+ pg_strong_random.c pgcheckdir.c pgmkdirp.c pgsleep.c pgstrcasecmp.c
+ pqsignal.c mkdtemp.c qsort.c qsort_arg.c quotes.c system.c
sprompt.c tar.c thread.c getopt.c getopt_long.c dirent.c
win32env.c win32error.c win32security.c win32setlocale.c);
@@ -425,7 +425,6 @@ sub mkvcbuild
'sha1.c', 'sha2.c',
'internal.c', 'internal-sha2.c',
'blf.c', 'rijndael.c',
- 'fortuna.c', 'random.c',
'pgp-mpi-internal.c', 'imath.c');
}
$pgcrypto->AddReference($postgres);
--
cgit v1.2.3
From daac8e30eb7874722f277ae3461abe46a39e56ed Mon Sep 17 00:00:00 2001
From: Fujii Masao
Date: Mon, 5 Dec 2016 20:44:21 +0900
Subject: Fix typo in docs.
Reported-by: Darko Prelec
---
doc/src/sgml/parallel.sgml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/parallel.sgml b/doc/src/sgml/parallel.sgml
index d0b438e889..38a040ef75 100644
--- a/doc/src/sgml/parallel.sgml
+++ b/doc/src/sgml/parallel.sgml
@@ -240,7 +240,7 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
copy of the output result set, so the query would not run any faster
than normal but would produce incorrect results. Instead, the parallel
portion of the plan must be what is known internally to the query
- optimizer as a partial plan>; that is, it must constructed
+ optimizer as a partial plan>; that is, it must be constructed
so that each process which executes the plan will generate only a
subset of the output rows in such a way that each required output row
is guaranteed to be generated by exactly one of the cooperating processes.
--
cgit v1.2.3
From 7dd8eb39bd2b9e06eeef038f80ae327efb4a7d55 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas
Date: Mon, 5 Dec 2016 14:49:00 +0200
Subject: Fix creation of stand-alone INSTALL.html file.
I missed the notice at the top of the file, that plain xref must not be
used in installation.sgml.
Per buildfarm member guaibasaurus.
---
doc/src/sgml/installation.sgml | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml
index 98594a487e..4431ed75a9 100644
--- a/doc/src/sgml/installation.sgml
+++ b/doc/src/sgml/installation.sgml
@@ -1103,7 +1103,9 @@ su - postgres
Allow the build to succeed even if PostgreSQL>
has no support for strong random numbers on the platform.
A source of random numbers is needed for some authentication
- protocols, as well as some routines in
+ protocols, as well as some routines in the
+
+ ]]>
module. --disable-strong-random disables functionality that
requires cryptographically strong random numbers, and substitutes
a weak pseudo-random-number-generator for the generation of
--
cgit v1.2.3
From 2b959d4957ff47c77b2518dcddbf3aa126a1593c Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Mon, 5 Dec 2016 10:53:21 -0500
Subject: Reduce the default for max_worker_processes back to 8.
Commit b460f5d6693103076dc554aa7cbb96e1e53074f9 -- at my suggestion --
increased the default value of max_worker_processes from 8 to 16, on
the theory that this would be harmless and convenient for users.
Unfortunately, this caused some buildfarm machines with low connection
limits to start failing, so apparently it's not harmless after all.
---
doc/src/sgml/config.sgml | 2 +-
src/backend/utils/init/globals.c | 2 +-
src/backend/utils/misc/guc.c | 2 +-
src/backend/utils/misc/postgresql.conf.sample | 2 +-
src/bin/pg_resetxlog/pg_resetxlog.c | 4 ++--
5 files changed, 6 insertions(+), 6 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index fdf8b3e9c7..b917f9578a 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1982,7 +1982,7 @@ include_dir 'conf.d'
Sets the maximum number of background processes that the system
can support. This parameter can only be set at server start. The
- default is 16.
+ default is 8.
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index 6ab03cea17..630c673cad 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -121,7 +121,7 @@ int replacement_sort_tuples = 150000;
*/
int NBuffers = 1000;
int MaxConnections = 90;
-int max_worker_processes = 16;
+int max_worker_processes = 8;
int max_parallel_workers = 8;
int MaxBackends = 0;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 4e49d5b79c..a02511754e 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2477,7 +2477,7 @@ static struct config_int ConfigureNamesInt[] =
NULL,
},
&max_worker_processes,
- 16, 0, MAX_BACKENDS,
+ 8, 0, MAX_BACKENDS,
check_max_worker_processes, NULL, NULL
},
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 1f490c7de4..7f9acfda06 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -161,7 +161,7 @@
# - Asynchronous Behavior -
#effective_io_concurrency = 1 # 1-1000; 0 disables prefetching
-#max_worker_processes = 16 # (change requires restart)
+#max_worker_processes = 8 # (change requires restart)
#max_parallel_workers_per_gather = 2 # taken from max_worker_processes
#max_parallel_workers = 8 # total maximum number of worker_processes
#old_snapshot_threshold = -1 # 1min-60d; -1 disables; 0 is immediate
diff --git a/src/bin/pg_resetxlog/pg_resetxlog.c b/src/bin/pg_resetxlog/pg_resetxlog.c
index 1beee7f8eb..2b76f64079 100644
--- a/src/bin/pg_resetxlog/pg_resetxlog.c
+++ b/src/bin/pg_resetxlog/pg_resetxlog.c
@@ -584,7 +584,7 @@ GuessControlValues(void)
ControlFile.wal_log_hints = false;
ControlFile.track_commit_timestamp = false;
ControlFile.MaxConnections = 100;
- ControlFile.max_worker_processes = 16;
+ ControlFile.max_worker_processes = 8;
ControlFile.max_prepared_xacts = 0;
ControlFile.max_locks_per_xact = 64;
@@ -800,7 +800,7 @@ RewriteControlFile(void)
ControlFile.wal_log_hints = false;
ControlFile.track_commit_timestamp = false;
ControlFile.MaxConnections = 100;
- ControlFile.max_worker_processes = 16;
+ ControlFile.max_worker_processes = 8;
ControlFile.max_prepared_xacts = 0;
ControlFile.max_locks_per_xact = 64;
--
cgit v1.2.3
From 0e50af245397c9bf3e7b02c0958be599de838fac Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Mon, 5 Dec 2016 11:03:17 -0500
Subject: Assorted documentation improvements for max_parallel_workers.
Commit b460f5d6693103076dc554aa7cbb96e1e53074f9 overlooked a few bits
of documentation that seem like they should mention the new setting.
---
doc/src/sgml/config.sgml | 10 ++++++++++
doc/src/sgml/parallel.sgml | 17 +++++++++++++----
2 files changed, 23 insertions(+), 4 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index b917f9578a..0fc4e57d90 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1990,6 +1990,12 @@ include_dir 'conf.d'
same or higher value than on the master server. Otherwise, queries
will not be allowed in the standby server.
+
+
+ When changing this value, consider also adjusting
+ and
+ .
+
@@ -2047,6 +2053,10 @@ include_dir 'conf.d'
parallel queries. The default value is 8. When increasing or
decreasing this value, consider also adjusting
.
+ Also, note that a setting for this value which is higher than
+ will have no effect,
+ since parallel workers are taken from the pool of worker processes
+ established by that setting.
diff --git a/doc/src/sgml/parallel.sgml b/doc/src/sgml/parallel.sgml
index 38a040ef75..f39c21a455 100644
--- a/doc/src/sgml/parallel.sgml
+++ b/doc/src/sgml/parallel.sgml
@@ -61,14 +61,15 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
session will request a number of background
worker processes equal to the number
of workers chosen by the planner. The total number of background
- workers that can exist at any one time is limited by
- , so it is possible for a
+ workers that can exist at any one time is limited by both
+ and
+ , so it is possible for a
parallel query to run with fewer workers than planned, or even with
no workers at all. The optimal plan may depend on the number of workers
that are available, so this can result in poor query performance. If this
occurrence is frequent, considering increasing
- max_worker_processes> so that more workers can be run
- simultaneously or alternatively reducing
+ max_worker_processes> and max_parallel_workers>
+ so that more workers can be run simultaneously or alternatively reducing
so that the planner
requests fewer workers.
@@ -203,6 +204,14 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
+
+
+ No background workers can be obtained because of the limitation that
+ the total number of background workers launched for purposes of
+ parallel query cannot exceed .
+
+
+
The client sends an Execute message with a non-zero fetch count.
--
cgit v1.2.3
From 093129c9d9fc231649b3cc27b8086443ccbbbc22 Mon Sep 17 00:00:00 2001
From: Stephen Frost
Date: Mon, 5 Dec 2016 15:50:55 -0500
Subject: Add support for restrictive RLS policies
We have had support for restrictive RLS policies since 9.5, but they
were only available through extensions which use the appropriate hooks.
This adds support into the grammer, catalog, psql and pg_dump for
restrictive RLS policies, thus reducing the cases where an extension is
necessary.
In passing, also move away from using "AND"d and "OR"d in comments.
As pointed out by Alvaro, it's not really appropriate to attempt
to make verbs out of "AND" and "OR", so reword those comments which
attempted to.
Reviewed By: Jeevan Chalke, Dean Rasheed
Discussion: https://postgr.es/m/20160901063404.GY4028@tamriel.snowman.net
---
doc/src/sgml/catalogs.sgml | 13 ++
doc/src/sgml/ddl.sgml | 58 +++++-
doc/src/sgml/ref/alter_policy.sgml | 7 +-
doc/src/sgml/ref/create_policy.sgml | 38 ++++
src/backend/catalog/system_views.sql | 6 +
src/backend/commands/policy.c | 9 +
src/backend/nodes/copyfuncs.c | 1 +
src/backend/nodes/equalfuncs.c | 1 +
src/backend/parser/gram.y | 43 +++--
src/backend/rewrite/rowsecurity.c | 54 +++---
src/bin/pg_dump/pg_dump.c | 69 +++++---
src/bin/pg_dump/pg_dump.h | 3 +-
src/bin/pg_dump/t/002_pg_dump.pl | 33 +++-
src/bin/psql/describe.c | 100 ++++++++---
src/bin/psql/tab-complete.c | 29 ++-
src/include/catalog/pg_policy.h | 16 +-
src/include/nodes/parsenodes.h | 1 +
src/include/rewrite/rowsecurity.h | 1 +
src/test/regress/expected/rowsecurity.out | 284 ++++++++++++++++++++++++------
src/test/regress/expected/rules.out | 4 +
src/test/regress/sql/rowsecurity.sql | 45 ++++-
21 files changed, 665 insertions(+), 150 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 561e228558..c4246dcd86 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -4747,6 +4747,13 @@
or *> for all
+
+ polpermissive
+ boolean
+
+ Is the policy permissive or restrictive?
+
+
polrolesoid[]
@@ -8437,6 +8444,12 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
pg_policy.polnameName of policy
+
+ polpermissive
+ text
+
+ Is the policy permissive or restrictive?
+ rolesname[]
diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml
index 157512c7ab..7e1bc0e534 100644
--- a/doc/src/sgml/ddl.sgml
+++ b/doc/src/sgml/ddl.sgml
@@ -1599,9 +1599,11 @@ REVOKE ALL ON accounts FROM PUBLIC;
When multiple policies apply to a given query, they are combined using
- OR, so that a row is accessible if any policy allows
- it. This is similar to the rule that a given role has the privileges
- of all roles that they are a member of.
+ either OR (for permissive policies, which are the
+ default) or using AND (for restrictive policies).
+ This is similar to the rule that a given role has the privileges
+ of all roles that they are a member of. Permissive vs. restrictive
+ policies are discussed further below.
@@ -1761,6 +1763,56 @@ ERROR: permission denied for relation passwd
-- Alice can change her own password; RLS silently prevents updating other rows
postgres=> update passwd set pwhash = 'abc';
UPDATE 1
+
+
+
+ All of the policies constructed thus far have been permissive policies,
+ meaning that when multiple policies are applied they are combined using
+ the "OR" boolean operator. While permissive policies can be constructed
+ to only allow access to rows in the intended cases, it can be simpler to
+ combine permissive policies with restrictive policies (which the records
+ must pass and which are combined using the "AND" boolean operator).
+ Building on the example above, we add a restrictive policy to require
+ the administrator to be connected over a local unix socket to access the
+ records of the passwd table:
+
+
+
+CREATE POLICY admin_local_only ON passwd AS RESTRICTIVE TO admin
+ USING (pg_catalog.inet_client_addr() IS NULL);
+
+
+
+ We can then see that an administrator connecting over a network will not
+ see any records, due to the restrictive policy:
+
+
+
+=> SELECT current_user;
+ current_user
+--------------
+ admin
+(1 row)
+
+=> select inet_client_addr();
+ inet_client_addr
+------------------
+ 127.0.0.1
+(1 row)
+
+=> SELECT current_user;
+ current_user
+--------------
+ admin
+(1 row)
+
+=> TABLE passwd;
+ user_name | pwhash | uid | gid | real_name | home_phone | extra_info | home_dir | shell
+-----------+--------+-----+-----+-----------+------------+------------+----------+-------
+(0 rows)
+
+=> UPDATE passwd set pwhash = NULL;
+UPDATE 0
diff --git a/doc/src/sgml/ref/alter_policy.sgml b/doc/src/sgml/ref/alter_policy.sgml
index a9b1541322..df347d180e 100644
--- a/doc/src/sgml/ref/alter_policy.sgml
+++ b/doc/src/sgml/ref/alter_policy.sgml
@@ -35,7 +35,12 @@ ALTER POLICY name ON ALTER POLICY changes the definition of an existing
- row-level security policy.
+ row-level security policy. Note that ALTER POLICY
+ only allows the set of roles to which the policy applies and the
+ USING and WITH CHECK expressions to
+ be modified. To change other properties of a policy, such as the command
+ to which it applies or whether it is permissive or restrictive, the policy
+ must be dropped and recreated.
diff --git a/doc/src/sgml/ref/create_policy.sgml b/doc/src/sgml/ref/create_policy.sgml
index 89d27879b1..f0486effaf 100644
--- a/doc/src/sgml/ref/create_policy.sgml
+++ b/doc/src/sgml/ref/create_policy.sgml
@@ -22,6 +22,7 @@ PostgreSQL documentation
CREATE POLICY name ON table_name
+ [ AS { PERMISSIVE | RESTRICTIVE } ]
[ FOR { ALL | SELECT | INSERT | UPDATE | DELETE } ]
[ TO { role_name | PUBLIC | CURRENT_USER | SESSION_USER } [, ...] ]
[ USING ( using_expression ) ]
@@ -119,6 +120,33 @@ CREATE POLICY name ON
+
+ PERMISSIVE
+
+
+ Specify that the policy is to be created as a permissive policy.
+ All permissive policies which are applicable to a given query will
+ be combined together using the boolean "OR" operator. By creating
+ permissive policies, administrators can add to the set of records
+ which can be accessed. Policies are PERMISSIVE by default.
+
+
+
+
+
+ RESTRICTIVE
+
+
+ Specify that the policy is to be created as a restrictive policy.
+ All restrictive policies which are applicable to a given query will
+ be combined together using the boolean "AND" operator. By creating
+ restrictive policies, administrators can reduce the set of records
+ which can be accessed as all restrictive policies must be passed for
+ each record.
+
+
+
+
command
@@ -390,6 +418,16 @@ CREATE POLICY name ON
+
+ Note that there needs to be at least one permissive policy to grant
+ access to records before restrictive policies can be usefully used to
+ reduce that access. If only restrictive policies exist, then no records
+ will be accessible. When a mix of permissive and restrictive policies
+ are present, a record is only accessible if at least one of the
+ permissive policies passes, in addition to all the restrictive
+ policies.
+
+
Generally, the system will enforce filter conditions imposed using
security policies prior to qualifications that appear in user queries,
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index e011af122c..df59d1819c 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -75,6 +75,12 @@ CREATE VIEW pg_policies AS
N.nspname AS schemaname,
C.relname AS tablename,
pol.polname AS policyname,
+ CASE
+ WHEN pol.polpermissive THEN
+ 'PERMISSIVE'
+ ELSE
+ 'RESTRICTIVE'
+ END AS permissive,
CASE
WHEN pol.polroles = '{0}' THEN
string_to_array('public', '')
diff --git a/src/backend/commands/policy.c b/src/backend/commands/policy.c
index d694cf80be..70e22c1000 100644
--- a/src/backend/commands/policy.c
+++ b/src/backend/commands/policy.c
@@ -235,6 +235,7 @@ RelationBuildRowSecurity(Relation relation)
{
Datum value_datum;
char cmd_value;
+ bool permissive_value;
Datum roles_datum;
char *qual_value;
Expr *qual_expr;
@@ -257,6 +258,12 @@ RelationBuildRowSecurity(Relation relation)
Assert(!isnull);
cmd_value = DatumGetChar(value_datum);
+ /* Get policy permissive or restrictive */
+ value_datum = heap_getattr(tuple, Anum_pg_policy_polpermissive,
+ RelationGetDescr(catalog), &isnull);
+ Assert(!isnull);
+ permissive_value = DatumGetBool(value_datum);
+
/* Get policy name */
value_datum = heap_getattr(tuple, Anum_pg_policy_polname,
RelationGetDescr(catalog), &isnull);
@@ -298,6 +305,7 @@ RelationBuildRowSecurity(Relation relation)
policy = palloc0(sizeof(RowSecurityPolicy));
policy->policy_name = pstrdup(policy_name_value);
policy->polcmd = cmd_value;
+ policy->permissive = permissive_value;
policy->roles = DatumGetArrayTypePCopy(roles_datum);
policy->qual = copyObject(qual_expr);
policy->with_check_qual = copyObject(with_check_qual);
@@ -796,6 +804,7 @@ CreatePolicy(CreatePolicyStmt *stmt)
values[Anum_pg_policy_polname - 1] = DirectFunctionCall1(namein,
CStringGetDatum(stmt->policy_name));
values[Anum_pg_policy_polcmd - 1] = CharGetDatum(polcmd);
+ values[Anum_pg_policy_polpermissive - 1] = BoolGetDatum(stmt->permissive);
values[Anum_pg_policy_polroles - 1] = PointerGetDatum(role_ids);
/* Add qual if present. */
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 04e49b7795..dd66adb0b2 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -4166,6 +4166,7 @@ _copyCreatePolicyStmt(const CreatePolicyStmt *from)
COPY_STRING_FIELD(policy_name);
COPY_NODE_FIELD(table);
COPY_STRING_FIELD(cmd_name);
+ COPY_SCALAR_FIELD(permissive);
COPY_NODE_FIELD(roles);
COPY_NODE_FIELD(qual);
COPY_NODE_FIELD(with_check);
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 2eaf41c37f..cad3aebecd 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -2125,6 +2125,7 @@ _equalCreatePolicyStmt(const CreatePolicyStmt *a, const CreatePolicyStmt *b)
COMPARE_STRING_FIELD(policy_name);
COMPARE_NODE_FIELD(table);
COMPARE_STRING_FIELD(cmd_name);
+ COMPARE_SCALAR_FIELD(permissive);
COMPARE_NODE_FIELD(roles);
COMPARE_NODE_FIELD(qual);
COMPARE_NODE_FIELD(with_check);
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index d6274b49e7..414348b95b 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -332,6 +332,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%type all_Op MathOp
%type row_security_cmd RowSecurityDefaultForCmd
+%type RowSecurityDefaultPermissive
%type RowSecurityOptionalWithCheck RowSecurityOptionalExpr
%type RowSecurityDefaultToRole RowSecurityOptionalToRole
@@ -4628,26 +4629,30 @@ AlterUserMappingStmt: ALTER USER MAPPING FOR auth_ident SERVER name alter_generi
/*****************************************************************************
*
* QUERIES:
- * CREATE POLICY name ON table [FOR cmd] [TO role, ...]
- * [USING (qual)] [WITH CHECK (with_check)]
+ * CREATE POLICY name ON table
+ * [AS { PERMISSIVE | RESTRICTIVE } ]
+ * [FOR { SELECT | INSERT | UPDATE | DELETE } ]
+ * [TO role, ...]
+ * [USING (qual)] [WITH CHECK (with check qual)]
* ALTER POLICY name ON table [TO role, ...]
- * [USING (qual)] [WITH CHECK (with_check)]
+ * [USING (qual)] [WITH CHECK (with check qual)]
* DROP POLICY name ON table
*
*****************************************************************************/
CreatePolicyStmt:
- CREATE POLICY name ON qualified_name RowSecurityDefaultForCmd
- RowSecurityDefaultToRole RowSecurityOptionalExpr
- RowSecurityOptionalWithCheck
+ CREATE POLICY name ON qualified_name RowSecurityDefaultPermissive
+ RowSecurityDefaultForCmd RowSecurityDefaultToRole
+ RowSecurityOptionalExpr RowSecurityOptionalWithCheck
{
CreatePolicyStmt *n = makeNode(CreatePolicyStmt);
n->policy_name = $3;
n->table = $5;
- n->cmd_name = $6;
- n->roles = $7;
- n->qual = $8;
- n->with_check = $9;
+ n->permissive = $6;
+ n->cmd_name = $7;
+ n->roles = $8;
+ n->qual = $9;
+ n->with_check = $10;
$$ = (Node *) n;
}
;
@@ -4711,6 +4716,24 @@ RowSecurityOptionalToRole:
| /* EMPTY */ { $$ = NULL; }
;
+RowSecurityDefaultPermissive:
+ AS IDENT
+ {
+ if (strcmp($2, "permissive") == 0)
+ $$ = true;
+ else if (strcmp($2, "restrictive") == 0)
+ $$ = false;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("unrecognized row security option \"%s\"", $2),
+ errhint("Only PERMISSIVE or RESTRICTIVE policies are supported currently."),
+ parser_errposition(@2)));
+
+ }
+ | /* EMPTY */ { $$ = true; }
+ ;
+
RowSecurityDefaultForCmd:
FOR row_security_cmd { $$ = $2; }
| /* EMPTY */ { $$ = "all"; }
diff --git a/src/backend/rewrite/rowsecurity.c b/src/backend/rewrite/rowsecurity.c
index e02911656a..b7edefc7dd 100644
--- a/src/backend/rewrite/rowsecurity.c
+++ b/src/backend/rewrite/rowsecurity.c
@@ -86,10 +86,10 @@ static bool check_role_for_policy(ArrayType *policy_roles, Oid user_id);
* hooks to allow extensions to add their own security policies
*
* row_security_policy_hook_permissive can be used to add policies which
- * are included in the "OR"d set of policies.
+ * are combined with the other permissive policies, using OR.
*
* row_security_policy_hook_restrictive can be used to add policies which
- * are enforced, regardless of other policies (they are "AND"d).
+ * are enforced, regardless of other policies (they are combined using AND).
*/
row_security_policy_hook_type row_security_policy_hook_permissive = NULL;
row_security_policy_hook_type row_security_policy_hook_restrictive = NULL;
@@ -212,8 +212,8 @@ get_row_security_policies(Query *root, RangeTblEntry *rte, int rt_index,
/*
* For SELECT, UPDATE and DELETE, add security quals to enforce the USING
* policies. These security quals control access to existing table rows.
- * Restrictive policies are "AND"d together, and permissive policies are
- * "OR"d together.
+ * Restrictive policies are combined together using AND, and permissive
+ * policies are combined together using OR.
*/
get_policies_for_relation(rel, commandType, user_id, &permissive_policies,
@@ -433,9 +433,20 @@ get_policies_for_relation(Relation relation, CmdType cmd, Oid user_id,
* the specified role.
*/
if (cmd_matches && check_role_for_policy(policy->roles, user_id))
- *permissive_policies = lappend(*permissive_policies, policy);
+ {
+ if (policy->permissive)
+ *permissive_policies = lappend(*permissive_policies, policy);
+ else
+ *restrictive_policies = lappend(*restrictive_policies, policy);
+ }
}
+ /*
+ * We sort restrictive policies by name so that any WCOs they generate are
+ * checked in a well-defined order.
+ */
+ *restrictive_policies = sort_policies_by_name(*restrictive_policies);
+
/*
* Then add any permissive or restrictive policies defined by extensions.
* These are simply appended to the lists of internal policies, if they
@@ -447,8 +458,10 @@ get_policies_for_relation(Relation relation, CmdType cmd, Oid user_id,
(*row_security_policy_hook_restrictive) (cmd, relation);
/*
- * We sort restrictive policies by name so that any WCOs they generate
- * are checked in a well-defined order.
+ * As with built-in restrictive policies, we sort any hook-provided
+ * restrictive policies by name also. Note that we also intentionally
+ * always check all built-in restrictive policies, in name order,
+ * before checking restrictive policies added by hooks, in name order.
*/
hook_policies = sort_policies_by_name(hook_policies);
@@ -481,8 +494,8 @@ get_policies_for_relation(Relation relation, CmdType cmd, Oid user_id,
*
* This is only used for restrictive policies, ensuring that any
* WithCheckOptions they generate are applied in a well-defined order.
- * This is not necessary for permissive policies, since they are all "OR"d
- * together into a single WithCheckOption check.
+ * This is not necessary for permissive policies, since they are all combined
+ * together using OR into a single WithCheckOption check.
*/
static List *
sort_policies_by_name(List *policies)
@@ -580,8 +593,8 @@ add_security_quals(int rt_index,
/*
* We now know that permissive policies exist, so we can now add
* security quals based on the USING clauses from the restrictive
- * policies. Since these need to be "AND"d together, we can just add
- * them one at a time.
+ * policies. Since these need to be combined together using AND, we
+ * can just add them one at a time.
*/
foreach(item, restrictive_policies)
{
@@ -599,8 +612,8 @@ add_security_quals(int rt_index,
}
/*
- * Then add a single security qual "OR"ing together the USING clauses
- * from all the permissive policies.
+ * Then add a single security qual combining together the USING
+ * clauses from all the permissive policies using OR.
*/
if (list_length(permissive_quals) == 1)
rowsec_expr = (Expr *) linitial(permissive_quals);
@@ -681,10 +694,11 @@ add_with_check_options(Relation rel,
if (permissive_quals != NIL)
{
/*
- * Add a single WithCheckOption for all the permissive policy clauses
- * "OR"d together. This check has no policy name, since if the check
- * fails it means that no policy granted permission to perform the
- * update, rather than any particular policy being violated.
+ * Add a single WithCheckOption for all the permissive policy clauses,
+ * combining them together using OR. This check has no policy name,
+ * since if the check fails it means that no policy granted permission
+ * to perform the update, rather than any particular policy being
+ * violated.
*/
WithCheckOption *wco;
@@ -705,9 +719,9 @@ add_with_check_options(Relation rel,
/*
* Now add WithCheckOptions for each of the restrictive policy clauses
- * (which will be "AND"d together). We use a separate WithCheckOption
- * for each restrictive policy to allow the policy name to be included
- * in error reports if the policy is violated.
+ * (which will be combined together using AND). We use a separate
+ * WithCheckOption for each restrictive policy to allow the policy
+ * name to be included in error reports if the policy is violated.
*/
foreach(item, restrictive_policies)
{
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 2ff60b9879..42873bb32a 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -3037,6 +3037,7 @@ getPolicies(Archive *fout, TableInfo tblinfo[], int numTables)
int i_tableoid;
int i_polname;
int i_polcmd;
+ int i_polpermissive;
int i_polroles;
int i_polqual;
int i_polwithcheck;
@@ -3082,7 +3083,8 @@ getPolicies(Archive *fout, TableInfo tblinfo[], int numTables)
polinfo->dobj.name = pg_strdup(tbinfo->dobj.name);
polinfo->poltable = tbinfo;
polinfo->polname = NULL;
- polinfo->polcmd = NULL;
+ polinfo->polcmd = '\0';
+ polinfo->polpermissive = 0;
polinfo->polroles = NULL;
polinfo->polqual = NULL;
polinfo->polwithcheck = NULL;
@@ -3101,15 +3103,26 @@ getPolicies(Archive *fout, TableInfo tblinfo[], int numTables)
resetPQExpBuffer(query);
/* Get the policies for the table. */
- appendPQExpBuffer(query,
- "SELECT oid, tableoid, pol.polname, pol.polcmd, "
- "CASE WHEN pol.polroles = '{0}' THEN 'PUBLIC' ELSE "
- " pg_catalog.array_to_string(ARRAY(SELECT pg_catalog.quote_ident(rolname) from pg_catalog.pg_roles WHERE oid = ANY(pol.polroles)), ', ') END AS polroles, "
- "pg_catalog.pg_get_expr(pol.polqual, pol.polrelid) AS polqual, "
- "pg_catalog.pg_get_expr(pol.polwithcheck, pol.polrelid) AS polwithcheck "
- "FROM pg_catalog.pg_policy pol "
- "WHERE polrelid = '%u'",
- tbinfo->dobj.catId.oid);
+ if (fout->remoteVersion >= 100000)
+ appendPQExpBuffer(query,
+ "SELECT oid, tableoid, pol.polname, pol.polcmd, pol.polpermissive, "
+ "CASE WHEN pol.polroles = '{0}' THEN NULL ELSE "
+ " pg_catalog.array_to_string(ARRAY(SELECT pg_catalog.quote_ident(rolname) from pg_catalog.pg_roles WHERE oid = ANY(pol.polroles)), ', ') END AS polroles, "
+ "pg_catalog.pg_get_expr(pol.polqual, pol.polrelid) AS polqual, "
+ "pg_catalog.pg_get_expr(pol.polwithcheck, pol.polrelid) AS polwithcheck "
+ "FROM pg_catalog.pg_policy pol "
+ "WHERE polrelid = '%u'",
+ tbinfo->dobj.catId.oid);
+ else
+ appendPQExpBuffer(query,
+ "SELECT oid, tableoid, pol.polname, pol.polcmd, 't' as polpermissive, "
+ "CASE WHEN pol.polroles = '{0}' THEN NULL ELSE "
+ " pg_catalog.array_to_string(ARRAY(SELECT pg_catalog.quote_ident(rolname) from pg_catalog.pg_roles WHERE oid = ANY(pol.polroles)), ', ') END AS polroles, "
+ "pg_catalog.pg_get_expr(pol.polqual, pol.polrelid) AS polqual, "
+ "pg_catalog.pg_get_expr(pol.polwithcheck, pol.polrelid) AS polwithcheck "
+ "FROM pg_catalog.pg_policy pol "
+ "WHERE polrelid = '%u'",
+ tbinfo->dobj.catId.oid);
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
ntups = PQntuples(res);
@@ -3129,6 +3142,7 @@ getPolicies(Archive *fout, TableInfo tblinfo[], int numTables)
i_tableoid = PQfnumber(res, "tableoid");
i_polname = PQfnumber(res, "polname");
i_polcmd = PQfnumber(res, "polcmd");
+ i_polpermissive = PQfnumber(res, "polpermissive");
i_polroles = PQfnumber(res, "polroles");
i_polqual = PQfnumber(res, "polqual");
i_polwithcheck = PQfnumber(res, "polwithcheck");
@@ -3147,8 +3161,13 @@ getPolicies(Archive *fout, TableInfo tblinfo[], int numTables)
polinfo[j].polname = pg_strdup(PQgetvalue(res, j, i_polname));
polinfo[j].dobj.name = pg_strdup(polinfo[j].polname);
- polinfo[j].polcmd = pg_strdup(PQgetvalue(res, j, i_polcmd));
- polinfo[j].polroles = pg_strdup(PQgetvalue(res, j, i_polroles));
+ polinfo[j].polcmd = *(PQgetvalue(res, j, i_polcmd));
+ polinfo[j].polpermissive = *(PQgetvalue(res, j, i_polpermissive)) == 't';
+
+ if (PQgetisnull(res, j, i_polroles))
+ polinfo[j].polroles = NULL;
+ else
+ polinfo[j].polroles = pg_strdup(PQgetvalue(res, j, i_polroles));
if (PQgetisnull(res, j, i_polqual))
polinfo[j].polqual = NULL;
@@ -3210,19 +3229,19 @@ dumpPolicy(Archive *fout, PolicyInfo *polinfo)
return;
}
- if (strcmp(polinfo->polcmd, "*") == 0)
- cmd = "ALL";
- else if (strcmp(polinfo->polcmd, "r") == 0)
- cmd = "SELECT";
- else if (strcmp(polinfo->polcmd, "a") == 0)
- cmd = "INSERT";
- else if (strcmp(polinfo->polcmd, "w") == 0)
- cmd = "UPDATE";
- else if (strcmp(polinfo->polcmd, "d") == 0)
- cmd = "DELETE";
+ if (polinfo->polcmd == '*')
+ cmd = "";
+ else if (polinfo->polcmd == 'r')
+ cmd = " FOR SELECT";
+ else if (polinfo->polcmd == 'a')
+ cmd = " FOR INSERT";
+ else if (polinfo->polcmd == 'w')
+ cmd = " FOR UPDATE";
+ else if (polinfo->polcmd == 'd')
+ cmd = " FOR DELETE";
else
{
- write_msg(NULL, "unexpected policy command type: \"%s\"\n",
+ write_msg(NULL, "unexpected policy command type: %c\n",
polinfo->polcmd);
exit_nicely(1);
}
@@ -3231,7 +3250,9 @@ dumpPolicy(Archive *fout, PolicyInfo *polinfo)
delqry = createPQExpBuffer();
appendPQExpBuffer(query, "CREATE POLICY %s", fmtId(polinfo->polname));
- appendPQExpBuffer(query, " ON %s FOR %s", fmtId(tbinfo->dobj.name), cmd);
+
+ appendPQExpBuffer(query, " ON %s%s%s", fmtId(tbinfo->dobj.name),
+ !polinfo->polpermissive ? " AS RESTRICTIVE" : "", cmd);
if (polinfo->polroles != NULL)
appendPQExpBuffer(query, " TO %s", polinfo->polroles);
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index f3e5977178..7df9066cd7 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -547,7 +547,8 @@ typedef struct _policyInfo
DumpableObject dobj;
TableInfo *poltable;
char *polname; /* null indicates RLS is enabled on rel */
- char *polcmd;
+ char polcmd;
+ bool polpermissive;
char *polroles;
char *polqual;
char *polwithcheck;
diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl
index 7379487471..f8955228cf 100644
--- a/src/bin/pg_dump/t/002_pg_dump.pl
+++ b/src/bin/pg_dump/t/002_pg_dump.pl
@@ -2004,7 +2004,7 @@ my %tests = (
USING (true)
WITH CHECK (true);',
regexp => qr/^
- \QCREATE POLICY p1 ON test_table FOR ALL TO PUBLIC \E
+ \QCREATE POLICY p1 ON test_table \E
\QUSING (true) WITH CHECK (true);\E
/xm,
like => {
@@ -2166,7 +2166,36 @@ my %tests = (
pg_dumpall_globals_clean => 1,
role => 1,
section_pre_data => 1, }, },
-
+ 'CREATE POLICY p6 ON test_table AS RESTRICTIVE' => {
+ create_order => 27,
+ create_sql => 'CREATE POLICY p6 ON dump_test.test_table AS RESTRICTIVE
+ USING (false);',
+ regexp => qr/^
+ \QCREATE POLICY p6 ON test_table AS RESTRICTIVE \E
+ \QUSING (false);\E
+ /xm,
+ like => {
+ binary_upgrade => 1,
+ clean => 1,
+ clean_if_exists => 1,
+ createdb => 1,
+ defaults => 1,
+ exclude_test_table_data => 1,
+ no_privs => 1,
+ no_owner => 1,
+ only_dump_test_schema => 1,
+ only_dump_test_table => 1,
+ pg_dumpall_dbprivs => 1,
+ schema_only => 1,
+ section_post_data => 1,
+ test_schema_plus_blobs => 1, },
+ unlike => {
+ exclude_dump_test_schema => 1,
+ exclude_test_table => 1,
+ pg_dumpall_globals => 1,
+ pg_dumpall_globals_clean => 1,
+ role => 1,
+ section_pre_data => 1, }, },
'CREATE SCHEMA dump_test' => {
all_runs => 1,
catch_all => 'CREATE ... commands',
diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c
index 1632104598..931c6887f9 100644
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@@ -887,10 +887,44 @@ permissionsList(const char *pattern)
" ), E'\\n') AS \"%s\"",
gettext_noop("Column privileges"));
- if (pset.sversion >= 90500)
+ if (pset.sversion >= 90500 && pset.sversion < 100000)
+ appendPQExpBuffer(&buf,
+ ",\n pg_catalog.array_to_string(ARRAY(\n"
+ " SELECT polname\n"
+ " || CASE WHEN polcmd != '*' THEN\n"
+ " E' (' || polcmd || E'):'\n"
+ " ELSE E':' \n"
+ " END\n"
+ " || CASE WHEN polqual IS NOT NULL THEN\n"
+ " E'\\n (u): ' || pg_catalog.pg_get_expr(polqual, polrelid)\n"
+ " ELSE E''\n"
+ " END\n"
+ " || CASE WHEN polwithcheck IS NOT NULL THEN\n"
+ " E'\\n (c): ' || pg_catalog.pg_get_expr(polwithcheck, polrelid)\n"
+ " ELSE E''\n"
+ " END"
+ " || CASE WHEN polroles <> '{0}' THEN\n"
+ " E'\\n to: ' || pg_catalog.array_to_string(\n"
+ " ARRAY(\n"
+ " SELECT rolname\n"
+ " FROM pg_catalog.pg_roles\n"
+ " WHERE oid = ANY (polroles)\n"
+ " ORDER BY 1\n"
+ " ), E', ')\n"
+ " ELSE E''\n"
+ " END\n"
+ " FROM pg_catalog.pg_policy pol\n"
+ " WHERE polrelid = c.oid), E'\\n')\n"
+ " AS \"%s\"",
+ gettext_noop("Policies"));
+
+ if (pset.sversion >= 100000)
appendPQExpBuffer(&buf,
",\n pg_catalog.array_to_string(ARRAY(\n"
" SELECT polname\n"
+ " || CASE WHEN NOT polpermissive THEN\n"
+ " E' (RESTRICTIVE)'\n"
+ " ELSE '' END\n"
" || CASE WHEN polcmd != '*' THEN\n"
" E' (' || polcmd || E'):'\n"
" ELSE E':' \n"
@@ -2112,21 +2146,36 @@ describeOneTableDetails(const char *schemaname,
/* print any row-level policies */
if (pset.sversion >= 90500)
{
- printfPQExpBuffer(&buf,
- "SELECT pol.polname,\n"
- "CASE WHEN pol.polroles = '{0}' THEN NULL ELSE array_to_string(array(select rolname from pg_roles where oid = any (pol.polroles) order by 1),',') END,\n"
- "pg_catalog.pg_get_expr(pol.polqual, pol.polrelid),\n"
- "pg_catalog.pg_get_expr(pol.polwithcheck, pol.polrelid),\n"
- "CASE pol.polcmd \n"
- "WHEN 'r' THEN 'SELECT'\n"
- "WHEN 'a' THEN 'INSERT'\n"
- "WHEN 'w' THEN 'UPDATE'\n"
- "WHEN 'd' THEN 'DELETE'\n"
- "WHEN '*' THEN 'ALL'\n"
- "END AS cmd\n"
- "FROM pg_catalog.pg_policy pol\n"
- "WHERE pol.polrelid = '%s' ORDER BY 1;",
- oid);
+ if (pset.sversion >= 100000)
+ printfPQExpBuffer(&buf,
+ "SELECT pol.polname, pol.polpermissive,\n"
+ "CASE WHEN pol.polroles = '{0}' THEN NULL ELSE array_to_string(array(select rolname from pg_roles where oid = any (pol.polroles) order by 1),',') END,\n"
+ "pg_catalog.pg_get_expr(pol.polqual, pol.polrelid),\n"
+ "pg_catalog.pg_get_expr(pol.polwithcheck, pol.polrelid),\n"
+ "CASE pol.polcmd \n"
+ "WHEN 'r' THEN 'SELECT'\n"
+ "WHEN 'a' THEN 'INSERT'\n"
+ "WHEN 'w' THEN 'UPDATE'\n"
+ "WHEN 'd' THEN 'DELETE'\n"
+ "END AS cmd\n"
+ "FROM pg_catalog.pg_policy pol\n"
+ "WHERE pol.polrelid = '%s' ORDER BY 1;",
+ oid);
+ else
+ printfPQExpBuffer(&buf,
+ "SELECT pol.polname, 't' as polpermissive,\n"
+ "CASE WHEN pol.polroles = '{0}' THEN NULL ELSE array_to_string(array(select rolname from pg_roles where oid = any (pol.polroles) order by 1),',') END,\n"
+ "pg_catalog.pg_get_expr(pol.polqual, pol.polrelid),\n"
+ "pg_catalog.pg_get_expr(pol.polwithcheck, pol.polrelid),\n"
+ "CASE pol.polcmd \n"
+ "WHEN 'r' THEN 'SELECT'\n"
+ "WHEN 'a' THEN 'INSERT'\n"
+ "WHEN 'w' THEN 'UPDATE'\n"
+ "WHEN 'd' THEN 'DELETE'\n"
+ "END AS cmd\n"
+ "FROM pg_catalog.pg_policy pol\n"
+ "WHERE pol.polrelid = '%s' ORDER BY 1;",
+ oid);
result = PSQLexec(buf.data);
if (!result)
@@ -2160,23 +2209,26 @@ describeOneTableDetails(const char *schemaname,
printfPQExpBuffer(&buf, " POLICY \"%s\"",
PQgetvalue(result, i, 0));
- if (!PQgetisnull(result, i, 4))
+ if (*(PQgetvalue(result, i, 1)) == 'f')
+ appendPQExpBuffer(&buf, " AS RESTRICTIVE");
+
+ if (!PQgetisnull(result, i, 5))
appendPQExpBuffer(&buf, " FOR %s",
- PQgetvalue(result, i, 4));
+ PQgetvalue(result, i, 5));
- if (!PQgetisnull(result, i, 1))
+ if (!PQgetisnull(result, i, 2))
{
appendPQExpBuffer(&buf, "\n TO %s",
- PQgetvalue(result, i, 1));
+ PQgetvalue(result, i, 2));
}
- if (!PQgetisnull(result, i, 2))
+ if (!PQgetisnull(result, i, 3))
appendPQExpBuffer(&buf, "\n USING (%s)",
- PQgetvalue(result, i, 2));
+ PQgetvalue(result, i, 3));
- if (!PQgetisnull(result, i, 3))
+ if (!PQgetisnull(result, i, 4))
appendPQExpBuffer(&buf, "\n WITH CHECK (%s)",
- PQgetvalue(result, i, 3));
+ PQgetvalue(result, i, 4));
printTableAddFooter(&cont, buf.data);
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 6aa3f20e13..6b95052a67 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -2162,9 +2162,15 @@ psql_completion(const char *text, int start, int end)
/* Complete "CREATE POLICY ON
" */
else if (Matches4("CREATE", "POLICY", MatchAny, "ON"))
COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_tables, NULL);
- /* Complete "CREATE POLICY ON
FOR|TO|USING|WITH CHECK" */
+ /* Complete "CREATE POLICY ON
AS PERMISSIVE|RESTRICTIVE */
+ else if (Matches6("CREATE", "POLICY", MatchAny, "ON", MatchAny, "AS"))
+ COMPLETE_WITH_LIST2("PERMISSIVE", "RESTRICTIVE");
+ /* CREATE POLICY ON
AS PERMISSIVE|RESTRICTIVE FOR|TO|USING|WITH CHECK */
+ else if (Matches7("CREATE", "POLICY", MatchAny, "ON", MatchAny, "AS", MatchAny))
+ COMPLETE_WITH_LIST4("FOR", "TO", "USING", "WITH CHECK");
/* CREATE POLICY ON
FOR ALL|SELECT|INSERT|UPDATE|DELETE */
else if (Matches6("CREATE", "POLICY", MatchAny, "ON", MatchAny, "FOR"))
COMPLETE_WITH_LIST5("ALL", "SELECT", "INSERT", "UPDATE", "DELETE");
@@ -2183,6 +2189,25 @@ psql_completion(const char *text, int start, int end)
/* Complete "CREATE POLICY ON
USING (" */
else if (Matches6("CREATE", "POLICY", MatchAny, "ON", MatchAny, "USING"))
COMPLETE_WITH_CONST("(");
+ /* CREATE POLICY ON
AS PERMISSIVE|RESTRICTIVE FOR ALL|SELECT|INSERT|UPDATE|DELETE */
+ else if (Matches8("CREATE", "POLICY", MatchAny, "ON", MatchAny, "AS", MatchAny, "FOR"))
+ COMPLETE_WITH_LIST5("ALL", "SELECT", "INSERT", "UPDATE", "DELETE");
+ /* Complete "CREATE POLICY ON
AS PERMISSIVE|RESTRICTIVE FOR INSERT TO|WITH CHECK" */
+ else if (Matches9("CREATE", "POLICY", MatchAny, "ON", MatchAny, "AS", MatchAny, "FOR", "INSERT"))
+ COMPLETE_WITH_LIST2("TO", "WITH CHECK (");
+ /* Complete "CREATE POLICY ON
AS PERMISSIVE|RESTRICTIVE FOR SELECT|DELETE TO|USING" */
+ else if (Matches9("CREATE", "POLICY", MatchAny, "ON", MatchAny, "AS", MatchAny, "FOR", "SELECT|DELETE"))
+ COMPLETE_WITH_LIST2("TO", "USING (");
+ /* CREATE POLICY ON
AS PERMISSIVE|RESTRICTIVE FOR ALL|UPDATE TO|USING|WITH CHECK */
+ else if (Matches9("CREATE", "POLICY", MatchAny, "ON", MatchAny, "AS", MatchAny, "FOR", "ALL|UPDATE"))
+ COMPLETE_WITH_LIST3("TO", "USING (", "WITH CHECK (");
+ /* Complete "CREATE POLICY ON
AS PERMISSIVE|RESTRICTIVE TO " */
+ else if (Matches8("CREATE", "POLICY", MatchAny, "ON", MatchAny, "AS", MatchAny, "TO"))
+ COMPLETE_WITH_QUERY(Query_for_list_of_grant_roles);
+ /* Complete "CREATE POLICY ON
AS PERMISSIVE|RESTRICTIVE USING (" */
+ else if (Matches8("CREATE", "POLICY", MatchAny, "ON", MatchAny, "AS", MatchAny, "USING"))
+ COMPLETE_WITH_CONST("(");
+
/* CREATE RULE */
/* Complete "CREATE RULE " with "AS ON" */
diff --git a/src/include/catalog/pg_policy.h b/src/include/catalog/pg_policy.h
index d73e9c2c1a..67b5fb5cd2 100644
--- a/src/include/catalog/pg_policy.h
+++ b/src/include/catalog/pg_policy.h
@@ -23,6 +23,7 @@ CATALOG(pg_policy,3256)
NameData polname; /* Policy name. */
Oid polrelid; /* Oid of the relation with policy. */
char polcmd; /* One of ACL_*_CHR, or '*' for all */
+ bool polpermissive; /* restrictive or permissive policy */
#ifdef CATALOG_VARLEN
Oid polroles[1]; /* Roles associated with policy, not-NULL */
@@ -42,12 +43,13 @@ typedef FormData_pg_policy *Form_pg_policy;
* compiler constants for pg_policy
* ----------------
*/
-#define Natts_pg_policy 6
-#define Anum_pg_policy_polname 1
-#define Anum_pg_policy_polrelid 2
-#define Anum_pg_policy_polcmd 3
-#define Anum_pg_policy_polroles 4
-#define Anum_pg_policy_polqual 5
-#define Anum_pg_policy_polwithcheck 6
+#define Natts_pg_policy 7
+#define Anum_pg_policy_polname 1
+#define Anum_pg_policy_polrelid 2
+#define Anum_pg_policy_polcmd 3
+#define Anum_pg_policy_polpermissive 4
+#define Anum_pg_policy_polroles 5
+#define Anum_pg_policy_polqual 6
+#define Anum_pg_policy_polwithcheck 7
#endif /* PG_POLICY_H */
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 04b1c2f2d4..f8003e46f3 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -2077,6 +2077,7 @@ typedef struct CreatePolicyStmt
char *policy_name; /* Policy's name */
RangeVar *table; /* the table name the policy applies to */
char *cmd_name; /* the command name the policy applies to */
+ bool permissive; /* restrictive or permissive policy */
List *roles; /* the roles associated with the policy */
Node *qual; /* the policy's condition */
Node *with_check; /* the policy's WITH CHECK condition. */
diff --git a/src/include/rewrite/rowsecurity.h b/src/include/rewrite/rowsecurity.h
index fd0cbaff59..2f3db8cf41 100644
--- a/src/include/rewrite/rowsecurity.h
+++ b/src/include/rewrite/rowsecurity.h
@@ -22,6 +22,7 @@ typedef struct RowSecurityPolicy
char *policy_name; /* Name of the policy */
char polcmd; /* Type of command policy is for */
ArrayType *roles; /* Array of roles policy is for */
+ bool permissive; /* restrictive or permissive policy */
Expr *qual; /* Expression to filter rows */
Expr *with_check_qual; /* Expression to limit rows allowed */
bool hassublinks; /* If either expression has sublinks */
diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out
index abfee92f4d..471e405c7a 100644
--- a/src/test/regress/expected/rowsecurity.out
+++ b/src/test/regress/expected/rowsecurity.out
@@ -7,6 +7,7 @@ SET client_min_messages TO 'warning';
DROP USER IF EXISTS regress_rls_alice;
DROP USER IF EXISTS regress_rls_bob;
DROP USER IF EXISTS regress_rls_carol;
+DROP USER IF EXISTS regress_rls_dave;
DROP USER IF EXISTS regress_rls_exempt_user;
DROP ROLE IF EXISTS regress_rls_group1;
DROP ROLE IF EXISTS regress_rls_group2;
@@ -16,6 +17,7 @@ RESET client_min_messages;
CREATE USER regress_rls_alice NOLOGIN;
CREATE USER regress_rls_bob NOLOGIN;
CREATE USER regress_rls_carol NOLOGIN;
+CREATE USER regress_rls_dave NOLOGIN;
CREATE USER regress_rls_exempt_user BYPASSRLS NOLOGIN;
CREATE ROLE regress_rls_group1 NOLOGIN;
CREATE ROLE regress_rls_group2 NOLOGIN;
@@ -67,11 +69,84 @@ INSERT INTO document VALUES
( 5, 44, 2, 'regress_rls_bob', 'my second manga'),
( 6, 22, 1, 'regress_rls_carol', 'great science fiction'),
( 7, 33, 2, 'regress_rls_carol', 'great technology book'),
- ( 8, 44, 1, 'regress_rls_carol', 'great manga');
+ ( 8, 44, 1, 'regress_rls_carol', 'great manga'),
+ ( 9, 22, 1, 'regress_rls_dave', 'awesome science fiction'),
+ (10, 33, 2, 'regress_rls_dave', 'awesome technology book');
ALTER TABLE document ENABLE ROW LEVEL SECURITY;
-- user's security level must be higher than or equal to document's
-CREATE POLICY p1 ON document
+CREATE POLICY p1 ON document AS PERMISSIVE
USING (dlevel <= (SELECT seclv FROM uaccount WHERE pguser = current_user));
+-- try to create a policy of bogus type
+CREATE POLICY p1 ON document AS UGLY
+ USING (dlevel <= (SELECT seclv FROM uaccount WHERE pguser = current_user));
+ERROR: unrecognized row security option "ugly"
+LINE 1: CREATE POLICY p1 ON document AS UGLY
+ ^
+HINT: Only PERMISSIVE or RESTRICTIVE policies are supported currently.
+-- but Dave isn't allowed to anything at cid 50 or above
+-- this is to make sure that we sort the policies by name first
+-- when applying WITH CHECK, a later INSERT by Dave should fail due
+-- to p1r first
+CREATE POLICY p2r ON document AS RESTRICTIVE TO regress_rls_dave
+ USING (cid <> 44 AND cid < 50);
+-- and Dave isn't allowed to see manga documents
+CREATE POLICY p1r ON document AS RESTRICTIVE TO regress_rls_dave
+ USING (cid <> 44);
+\dp
+ Access privileges
+ Schema | Name | Type | Access privileges | Column privileges | Policies
+--------------------+----------+-------+---------------------------------------------+-------------------+--------------------------------------------
+ regress_rls_schema | category | table | regress_rls_alice=arwdDxt/regress_rls_alice+| |
+ | | | =arwdDxt/regress_rls_alice | |
+ regress_rls_schema | document | table | regress_rls_alice=arwdDxt/regress_rls_alice+| | p1: +
+ | | | =arwdDxt/regress_rls_alice | | (u): (dlevel <= ( SELECT uaccount.seclv +
+ | | | | | FROM uaccount +
+ | | | | | WHERE (uaccount.pguser = CURRENT_USER)))+
+ | | | | | p2r (RESTRICTIVE): +
+ | | | | | (u): ((cid <> 44) AND (cid < 50)) +
+ | | | | | to: regress_rls_dave +
+ | | | | | p1r (RESTRICTIVE): +
+ | | | | | (u): (cid <> 44) +
+ | | | | | to: regress_rls_dave
+ regress_rls_schema | uaccount | table | regress_rls_alice=arwdDxt/regress_rls_alice+| |
+ | | | =r/regress_rls_alice | |
+(3 rows)
+
+\d document
+ Table "regress_rls_schema.document"
+ Column | Type | Collation | Nullable | Default
+---------+---------+-----------+----------+---------
+ did | integer | | not null |
+ cid | integer | | |
+ dlevel | integer | | not null |
+ dauthor | name | | |
+ dtitle | text | | |
+Indexes:
+ "document_pkey" PRIMARY KEY, btree (did)
+Foreign-key constraints:
+ "document_cid_fkey" FOREIGN KEY (cid) REFERENCES category(cid)
+Policies:
+ POLICY "p1"
+ USING ((dlevel <= ( SELECT uaccount.seclv
+ FROM uaccount
+ WHERE (uaccount.pguser = CURRENT_USER))))
+ POLICY "p1r" AS RESTRICTIVE
+ TO regress_rls_dave
+ USING ((cid <> 44))
+ POLICY "p2r" AS RESTRICTIVE
+ TO regress_rls_dave
+ USING (((cid <> 44) AND (cid < 50)))
+
+SELECT * FROM pg_policies WHERE schemaname = 'regress_rls_schema' AND tablename = 'document' ORDER BY policyname;
+ schemaname | tablename | policyname | permissive | roles | cmd | qual | with_check
+--------------------+-----------+------------+-------------+--------------------+-----+--------------------------------------------+------------
+ regress_rls_schema | document | p1 | PERMISSIVE | {public} | ALL | (dlevel <= ( SELECT uaccount.seclv +|
+ | | | | | | FROM uaccount +|
+ | | | | | | WHERE (uaccount.pguser = CURRENT_USER))) |
+ regress_rls_schema | document | p1r | RESTRICTIVE | {regress_rls_dave} | ALL | (cid <> 44) |
+ regress_rls_schema | document | p2r | RESTRICTIVE | {regress_rls_dave} | ALL | ((cid <> 44) AND (cid < 50)) |
+(3 rows)
+
-- viewpoint from regress_rls_bob
SET SESSION AUTHORIZATION regress_rls_bob;
SET row_security TO ON;
@@ -80,26 +155,30 @@ NOTICE: f_leak => my first novel
NOTICE: f_leak => my first manga
NOTICE: f_leak => great science fiction
NOTICE: f_leak => great manga
- did | cid | dlevel | dauthor | dtitle
------+-----+--------+-------------------+-----------------------
+NOTICE: f_leak => awesome science fiction
+ did | cid | dlevel | dauthor | dtitle
+-----+-----+--------+-------------------+-------------------------
1 | 11 | 1 | regress_rls_bob | my first novel
4 | 44 | 1 | regress_rls_bob | my first manga
6 | 22 | 1 | regress_rls_carol | great science fiction
8 | 44 | 1 | regress_rls_carol | great manga
-(4 rows)
+ 9 | 22 | 1 | regress_rls_dave | awesome science fiction
+(5 rows)
SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER BY did;
NOTICE: f_leak => my first novel
NOTICE: f_leak => my first manga
NOTICE: f_leak => great science fiction
NOTICE: f_leak => great manga
- cid | did | dlevel | dauthor | dtitle | cname
------+-----+--------+-------------------+-----------------------+-----------------
- 11 | 1 | 1 | regress_rls_bob | my first novel | novel
- 44 | 4 | 1 | regress_rls_bob | my first manga | manga
- 22 | 6 | 1 | regress_rls_carol | great science fiction | science fiction
- 44 | 8 | 1 | regress_rls_carol | great manga | manga
-(4 rows)
+NOTICE: f_leak => awesome science fiction
+ cid | did | dlevel | dauthor | dtitle | cname
+-----+-----+--------+-------------------+-------------------------+-----------------
+ 11 | 1 | 1 | regress_rls_bob | my first novel | novel
+ 44 | 4 | 1 | regress_rls_bob | my first manga | manga
+ 22 | 6 | 1 | regress_rls_carol | great science fiction | science fiction
+ 44 | 8 | 1 | regress_rls_carol | great manga | manga
+ 22 | 9 | 1 | regress_rls_dave | awesome science fiction | science fiction
+(5 rows)
-- try a sampled version
SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0)
@@ -107,12 +186,14 @@ SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0)
NOTICE: f_leak => my first manga
NOTICE: f_leak => great science fiction
NOTICE: f_leak => great manga
- did | cid | dlevel | dauthor | dtitle
------+-----+--------+-------------------+-----------------------
+NOTICE: f_leak => awesome science fiction
+ did | cid | dlevel | dauthor | dtitle
+-----+-----+--------+-------------------+-------------------------
4 | 44 | 1 | regress_rls_bob | my first manga
6 | 22 | 1 | regress_rls_carol | great science fiction
8 | 44 | 1 | regress_rls_carol | great manga
-(3 rows)
+ 9 | 22 | 1 | regress_rls_dave | awesome science fiction
+(4 rows)
-- viewpoint from regress_rls_carol
SET SESSION AUTHORIZATION regress_rls_carol;
@@ -125,8 +206,10 @@ NOTICE: f_leak => my second manga
NOTICE: f_leak => great science fiction
NOTICE: f_leak => great technology book
NOTICE: f_leak => great manga
- did | cid | dlevel | dauthor | dtitle
------+-----+--------+-------------------+-----------------------
+NOTICE: f_leak => awesome science fiction
+NOTICE: f_leak => awesome technology book
+ did | cid | dlevel | dauthor | dtitle
+-----+-----+--------+-------------------+-------------------------
1 | 11 | 1 | regress_rls_bob | my first novel
2 | 11 | 2 | regress_rls_bob | my second novel
3 | 22 | 2 | regress_rls_bob | my science fiction
@@ -135,7 +218,9 @@ NOTICE: f_leak => great manga
6 | 22 | 1 | regress_rls_carol | great science fiction
7 | 33 | 2 | regress_rls_carol | great technology book
8 | 44 | 1 | regress_rls_carol | great manga
-(8 rows)
+ 9 | 22 | 1 | regress_rls_dave | awesome science fiction
+ 10 | 33 | 2 | regress_rls_dave | awesome technology book
+(10 rows)
SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER BY did;
NOTICE: f_leak => my first novel
@@ -146,17 +231,21 @@ NOTICE: f_leak => my second manga
NOTICE: f_leak => great science fiction
NOTICE: f_leak => great technology book
NOTICE: f_leak => great manga
- cid | did | dlevel | dauthor | dtitle | cname
------+-----+--------+-------------------+-----------------------+-----------------
- 11 | 1 | 1 | regress_rls_bob | my first novel | novel
- 11 | 2 | 2 | regress_rls_bob | my second novel | novel
- 22 | 3 | 2 | regress_rls_bob | my science fiction | science fiction
- 44 | 4 | 1 | regress_rls_bob | my first manga | manga
- 44 | 5 | 2 | regress_rls_bob | my second manga | manga
- 22 | 6 | 1 | regress_rls_carol | great science fiction | science fiction
- 33 | 7 | 2 | regress_rls_carol | great technology book | technology
- 44 | 8 | 1 | regress_rls_carol | great manga | manga
-(8 rows)
+NOTICE: f_leak => awesome science fiction
+NOTICE: f_leak => awesome technology book
+ cid | did | dlevel | dauthor | dtitle | cname
+-----+-----+--------+-------------------+-------------------------+-----------------
+ 11 | 1 | 1 | regress_rls_bob | my first novel | novel
+ 11 | 2 | 2 | regress_rls_bob | my second novel | novel
+ 22 | 3 | 2 | regress_rls_bob | my science fiction | science fiction
+ 44 | 4 | 1 | regress_rls_bob | my first manga | manga
+ 44 | 5 | 2 | regress_rls_bob | my second manga | manga
+ 22 | 6 | 1 | regress_rls_carol | great science fiction | science fiction
+ 33 | 7 | 2 | regress_rls_carol | great technology book | technology
+ 44 | 8 | 1 | regress_rls_carol | great manga | manga
+ 22 | 9 | 1 | regress_rls_dave | awesome science fiction | science fiction
+ 33 | 10 | 2 | regress_rls_dave | awesome technology book | technology
+(10 rows)
-- try a sampled version
SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0)
@@ -165,13 +254,15 @@ NOTICE: f_leak => my first manga
NOTICE: f_leak => my second manga
NOTICE: f_leak => great science fiction
NOTICE: f_leak => great manga
- did | cid | dlevel | dauthor | dtitle
------+-----+--------+-------------------+-----------------------
+NOTICE: f_leak => awesome science fiction
+ did | cid | dlevel | dauthor | dtitle
+-----+-----+--------+-------------------+-------------------------
4 | 44 | 1 | regress_rls_bob | my first manga
5 | 44 | 2 | regress_rls_bob | my second manga
6 | 22 | 1 | regress_rls_carol | great science fiction
8 | 44 | 1 | regress_rls_carol | great manga
-(4 rows)
+ 9 | 22 | 1 | regress_rls_dave | awesome science fiction
+(5 rows)
EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle);
QUERY PLAN
@@ -201,6 +292,81 @@ EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dt
Index Cond: (pguser = CURRENT_USER)
(11 rows)
+-- viewpoint from regress_rls_dave
+SET SESSION AUTHORIZATION regress_rls_dave;
+SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did;
+NOTICE: f_leak => my first novel
+NOTICE: f_leak => my second novel
+NOTICE: f_leak => my science fiction
+NOTICE: f_leak => great science fiction
+NOTICE: f_leak => great technology book
+NOTICE: f_leak => awesome science fiction
+NOTICE: f_leak => awesome technology book
+ did | cid | dlevel | dauthor | dtitle
+-----+-----+--------+-------------------+-------------------------
+ 1 | 11 | 1 | regress_rls_bob | my first novel
+ 2 | 11 | 2 | regress_rls_bob | my second novel
+ 3 | 22 | 2 | regress_rls_bob | my science fiction
+ 6 | 22 | 1 | regress_rls_carol | great science fiction
+ 7 | 33 | 2 | regress_rls_carol | great technology book
+ 9 | 22 | 1 | regress_rls_dave | awesome science fiction
+ 10 | 33 | 2 | regress_rls_dave | awesome technology book
+(7 rows)
+
+SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER BY did;
+NOTICE: f_leak => my first novel
+NOTICE: f_leak => my second novel
+NOTICE: f_leak => my science fiction
+NOTICE: f_leak => great science fiction
+NOTICE: f_leak => great technology book
+NOTICE: f_leak => awesome science fiction
+NOTICE: f_leak => awesome technology book
+ cid | did | dlevel | dauthor | dtitle | cname
+-----+-----+--------+-------------------+-------------------------+-----------------
+ 11 | 1 | 1 | regress_rls_bob | my first novel | novel
+ 11 | 2 | 2 | regress_rls_bob | my second novel | novel
+ 22 | 3 | 2 | regress_rls_bob | my science fiction | science fiction
+ 22 | 6 | 1 | regress_rls_carol | great science fiction | science fiction
+ 33 | 7 | 2 | regress_rls_carol | great technology book | technology
+ 22 | 9 | 1 | regress_rls_dave | awesome science fiction | science fiction
+ 33 | 10 | 2 | regress_rls_dave | awesome technology book | technology
+(7 rows)
+
+EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle);
+ QUERY PLAN
+---------------------------------------------------------------------------------
+ Subquery Scan on document
+ Filter: f_leak(document.dtitle)
+ -> Seq Scan on document document_1
+ Filter: ((cid <> 44) AND (cid <> 44) AND (cid < 50) AND (dlevel <= $0))
+ InitPlan 1 (returns $0)
+ -> Index Scan using uaccount_pkey on uaccount
+ Index Cond: (pguser = CURRENT_USER)
+(7 rows)
+
+EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle);
+ QUERY PLAN
+---------------------------------------------------------------------------------------------
+ Hash Join
+ Hash Cond: (category.cid = document.cid)
+ -> Seq Scan on category
+ -> Hash
+ -> Subquery Scan on document
+ Filter: f_leak(document.dtitle)
+ -> Seq Scan on document document_1
+ Filter: ((cid <> 44) AND (cid <> 44) AND (cid < 50) AND (dlevel <= $0))
+ InitPlan 1 (returns $0)
+ -> Index Scan using uaccount_pkey on uaccount
+ Index Cond: (pguser = CURRENT_USER)
+(11 rows)
+
+-- 44 would technically fail for both p2r and p1r, but we should get an error
+-- back from p1r for this because it sorts first
+INSERT INTO document VALUES (100, 44, 1, 'regress_rls_dave', 'testing sorting of policies'); -- fail
+ERROR: new row violates row-level security policy "p1r" for table "document"
+-- Just to see a p2r error
+INSERT INTO document VALUES (100, 55, 1, 'regress_rls_dave', 'testing sorting of policies'); -- fail
+ERROR: new row violates row-level security policy "p2r" for table "document"
-- only owner can change policies
ALTER POLICY p1 ON document USING (true); --fail
ERROR: must be owner of relation document
@@ -318,7 +484,7 @@ SELECT * FROM document d FULL OUTER JOIN category c on d.cid = c.cid;
7 | 33 | 2 | regress_rls_carol | great technology book | |
(3 rows)
-INSERT INTO document VALUES (10, 33, 1, current_user, 'hoge');
+INSERT INTO document VALUES (11, 33, 1, current_user, 'hoge');
-- UNIQUE or PRIMARY KEY constraint violation DOES reveal presence of row
SET SESSION AUTHORIZATION regress_rls_bob;
INSERT INTO document VALUES (8, 44, 1, 'regress_rls_bob', 'my third manga'); -- Must fail with unique violation, revealing presence of did we can't see
@@ -337,8 +503,8 @@ ERROR: new row violates row-level security policy for table "document"
RESET SESSION AUTHORIZATION;
SET row_security TO ON;
SELECT * FROM document;
- did | cid | dlevel | dauthor | dtitle
------+-----+--------+-------------------+-----------------------
+ did | cid | dlevel | dauthor | dtitle
+-----+-----+--------+-------------------+-------------------------
1 | 11 | 1 | regress_rls_bob | my first novel
2 | 11 | 2 | regress_rls_bob | my second novel
3 | 22 | 2 | regress_rls_bob | my science fiction
@@ -347,8 +513,10 @@ SELECT * FROM document;
6 | 22 | 1 | regress_rls_carol | great science fiction
7 | 33 | 2 | regress_rls_carol | great technology book
8 | 44 | 1 | regress_rls_carol | great manga
- 10 | 33 | 1 | regress_rls_carol | hoge
-(9 rows)
+ 9 | 22 | 1 | regress_rls_dave | awesome science fiction
+ 10 | 33 | 2 | regress_rls_dave | awesome technology book
+ 11 | 33 | 1 | regress_rls_carol | hoge
+(11 rows)
SELECT * FROM category;
cid | cname
@@ -363,8 +531,8 @@ SELECT * FROM category;
RESET SESSION AUTHORIZATION;
SET row_security TO OFF;
SELECT * FROM document;
- did | cid | dlevel | dauthor | dtitle
------+-----+--------+-------------------+-----------------------
+ did | cid | dlevel | dauthor | dtitle
+-----+-----+--------+-------------------+-------------------------
1 | 11 | 1 | regress_rls_bob | my first novel
2 | 11 | 2 | regress_rls_bob | my second novel
3 | 22 | 2 | regress_rls_bob | my science fiction
@@ -373,8 +541,10 @@ SELECT * FROM document;
6 | 22 | 1 | regress_rls_carol | great science fiction
7 | 33 | 2 | regress_rls_carol | great technology book
8 | 44 | 1 | regress_rls_carol | great manga
- 10 | 33 | 1 | regress_rls_carol | hoge
-(9 rows)
+ 9 | 22 | 1 | regress_rls_dave | awesome science fiction
+ 10 | 33 | 2 | regress_rls_dave | awesome technology book
+ 11 | 33 | 1 | regress_rls_carol | hoge
+(11 rows)
SELECT * FROM category;
cid | cname
@@ -389,8 +559,8 @@ SELECT * FROM category;
SET SESSION AUTHORIZATION regress_rls_exempt_user;
SET row_security TO OFF;
SELECT * FROM document;
- did | cid | dlevel | dauthor | dtitle
------+-----+--------+-------------------+-----------------------
+ did | cid | dlevel | dauthor | dtitle
+-----+-----+--------+-------------------+-------------------------
1 | 11 | 1 | regress_rls_bob | my first novel
2 | 11 | 2 | regress_rls_bob | my second novel
3 | 22 | 2 | regress_rls_bob | my science fiction
@@ -399,8 +569,10 @@ SELECT * FROM document;
6 | 22 | 1 | regress_rls_carol | great science fiction
7 | 33 | 2 | regress_rls_carol | great technology book
8 | 44 | 1 | regress_rls_carol | great manga
- 10 | 33 | 1 | regress_rls_carol | hoge
-(9 rows)
+ 9 | 22 | 1 | regress_rls_dave | awesome science fiction
+ 10 | 33 | 2 | regress_rls_dave | awesome technology book
+ 11 | 33 | 1 | regress_rls_carol | hoge
+(11 rows)
SELECT * FROM category;
cid | cname
@@ -415,8 +587,8 @@ SELECT * FROM category;
SET SESSION AUTHORIZATION regress_rls_alice;
SET row_security TO ON;
SELECT * FROM document;
- did | cid | dlevel | dauthor | dtitle
------+-----+--------+-------------------+-----------------------
+ did | cid | dlevel | dauthor | dtitle
+-----+-----+--------+-------------------+-------------------------
1 | 11 | 1 | regress_rls_bob | my first novel
2 | 11 | 2 | regress_rls_bob | my second novel
3 | 22 | 2 | regress_rls_bob | my science fiction
@@ -425,8 +597,10 @@ SELECT * FROM document;
6 | 22 | 1 | regress_rls_carol | great science fiction
7 | 33 | 2 | regress_rls_carol | great technology book
8 | 44 | 1 | regress_rls_carol | great manga
- 10 | 33 | 1 | regress_rls_carol | hoge
-(9 rows)
+ 9 | 22 | 1 | regress_rls_dave | awesome science fiction
+ 10 | 33 | 2 | regress_rls_dave | awesome technology book
+ 11 | 33 | 1 | regress_rls_carol | hoge
+(11 rows)
SELECT * FROM category;
cid | cname
@@ -441,8 +615,8 @@ SELECT * FROM category;
SET SESSION AUTHORIZATION regress_rls_alice;
SET row_security TO OFF;
SELECT * FROM document;
- did | cid | dlevel | dauthor | dtitle
------+-----+--------+-------------------+-----------------------
+ did | cid | dlevel | dauthor | dtitle
+-----+-----+--------+-------------------+-------------------------
1 | 11 | 1 | regress_rls_bob | my first novel
2 | 11 | 2 | regress_rls_bob | my second novel
3 | 22 | 2 | regress_rls_bob | my science fiction
@@ -451,8 +625,10 @@ SELECT * FROM document;
6 | 22 | 1 | regress_rls_carol | great science fiction
7 | 33 | 2 | regress_rls_carol | great technology book
8 | 44 | 1 | regress_rls_carol | great manga
- 10 | 33 | 1 | regress_rls_carol | hoge
-(9 rows)
+ 9 | 22 | 1 | regress_rls_dave | awesome science fiction
+ 10 | 33 | 2 | regress_rls_dave | awesome technology book
+ 11 | 33 | 1 | regress_rls_carol | hoge
+(11 rows)
SELECT * FROM category;
cid | cname
@@ -1517,6 +1693,7 @@ SELECT * FROM b1;
--
SET SESSION AUTHORIZATION regress_rls_alice;
DROP POLICY p1 ON document;
+DROP POLICY p1r ON document;
CREATE POLICY p1 ON document FOR SELECT USING (true);
CREATE POLICY p2 ON document FOR INSERT WITH CHECK (dauthor = current_user);
CREATE POLICY p3 ON document FOR UPDATE
@@ -3461,6 +3638,7 @@ RESET client_min_messages;
DROP USER regress_rls_alice;
DROP USER regress_rls_bob;
DROP USER regress_rls_carol;
+DROP USER regress_rls_dave;
DROP USER regress_rls_exempt_user;
DROP ROLE regress_rls_group1;
DROP ROLE regress_rls_group2;
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 031e8c2ef5..a8f35a76fa 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1379,6 +1379,10 @@ pg_matviews| SELECT n.nspname AS schemaname,
pg_policies| SELECT n.nspname AS schemaname,
c.relname AS tablename,
pol.polname AS policyname,
+ CASE
+ WHEN pol.polpermissive THEN 'PERMISSIVE'::text
+ ELSE 'RESTRICTIVE'::text
+ END AS permissive,
CASE
WHEN (pol.polroles = '{0}'::oid[]) THEN (string_to_array('public'::text, ''::text))::name[]
ELSE ARRAY( SELECT pg_authid.rolname
diff --git a/src/test/regress/sql/rowsecurity.sql b/src/test/regress/sql/rowsecurity.sql
index 7fcefe4502..5e2f4ef884 100644
--- a/src/test/regress/sql/rowsecurity.sql
+++ b/src/test/regress/sql/rowsecurity.sql
@@ -10,6 +10,7 @@ SET client_min_messages TO 'warning';
DROP USER IF EXISTS regress_rls_alice;
DROP USER IF EXISTS regress_rls_bob;
DROP USER IF EXISTS regress_rls_carol;
+DROP USER IF EXISTS regress_rls_dave;
DROP USER IF EXISTS regress_rls_exempt_user;
DROP ROLE IF EXISTS regress_rls_group1;
DROP ROLE IF EXISTS regress_rls_group2;
@@ -22,6 +23,7 @@ RESET client_min_messages;
CREATE USER regress_rls_alice NOLOGIN;
CREATE USER regress_rls_bob NOLOGIN;
CREATE USER regress_rls_carol NOLOGIN;
+CREATE USER regress_rls_dave NOLOGIN;
CREATE USER regress_rls_exempt_user BYPASSRLS NOLOGIN;
CREATE ROLE regress_rls_group1 NOLOGIN;
CREATE ROLE regress_rls_group2 NOLOGIN;
@@ -80,14 +82,35 @@ INSERT INTO document VALUES
( 5, 44, 2, 'regress_rls_bob', 'my second manga'),
( 6, 22, 1, 'regress_rls_carol', 'great science fiction'),
( 7, 33, 2, 'regress_rls_carol', 'great technology book'),
- ( 8, 44, 1, 'regress_rls_carol', 'great manga');
+ ( 8, 44, 1, 'regress_rls_carol', 'great manga'),
+ ( 9, 22, 1, 'regress_rls_dave', 'awesome science fiction'),
+ (10, 33, 2, 'regress_rls_dave', 'awesome technology book');
ALTER TABLE document ENABLE ROW LEVEL SECURITY;
-- user's security level must be higher than or equal to document's
-CREATE POLICY p1 ON document
+CREATE POLICY p1 ON document AS PERMISSIVE
USING (dlevel <= (SELECT seclv FROM uaccount WHERE pguser = current_user));
+-- try to create a policy of bogus type
+CREATE POLICY p1 ON document AS UGLY
+ USING (dlevel <= (SELECT seclv FROM uaccount WHERE pguser = current_user));
+
+-- but Dave isn't allowed to anything at cid 50 or above
+-- this is to make sure that we sort the policies by name first
+-- when applying WITH CHECK, a later INSERT by Dave should fail due
+-- to p1r first
+CREATE POLICY p2r ON document AS RESTRICTIVE TO regress_rls_dave
+ USING (cid <> 44 AND cid < 50);
+
+-- and Dave isn't allowed to see manga documents
+CREATE POLICY p1r ON document AS RESTRICTIVE TO regress_rls_dave
+ USING (cid <> 44);
+
+\dp
+\d document
+SELECT * FROM pg_policies WHERE schemaname = 'regress_rls_schema' AND tablename = 'document' ORDER BY policyname;
+
-- viewpoint from regress_rls_bob
SET SESSION AUTHORIZATION regress_rls_bob;
SET row_security TO ON;
@@ -110,6 +133,20 @@ SELECT * FROM document TABLESAMPLE BERNOULLI(50) REPEATABLE(0)
EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle);
EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle);
+-- viewpoint from regress_rls_dave
+SET SESSION AUTHORIZATION regress_rls_dave;
+SELECT * FROM document WHERE f_leak(dtitle) ORDER BY did;
+SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle) ORDER BY did;
+
+EXPLAIN (COSTS OFF) SELECT * FROM document WHERE f_leak(dtitle);
+EXPLAIN (COSTS OFF) SELECT * FROM document NATURAL JOIN category WHERE f_leak(dtitle);
+
+-- 44 would technically fail for both p2r and p1r, but we should get an error
+-- back from p1r for this because it sorts first
+INSERT INTO document VALUES (100, 44, 1, 'regress_rls_dave', 'testing sorting of policies'); -- fail
+-- Just to see a p2r error
+INSERT INTO document VALUES (100, 55, 1, 'regress_rls_dave', 'testing sorting of policies'); -- fail
+
-- only owner can change policies
ALTER POLICY p1 ON document USING (true); --fail
DROP POLICY p1 ON document; --fail
@@ -147,7 +184,7 @@ DELETE FROM category WHERE cid = 33; -- fails with FK violation
-- can insert FK referencing invisible PK
SET SESSION AUTHORIZATION regress_rls_carol;
SELECT * FROM document d FULL OUTER JOIN category c on d.cid = c.cid;
-INSERT INTO document VALUES (10, 33, 1, current_user, 'hoge');
+INSERT INTO document VALUES (11, 33, 1, current_user, 'hoge');
-- UNIQUE or PRIMARY KEY constraint violation DOES reveal presence of row
SET SESSION AUTHORIZATION regress_rls_bob;
@@ -517,6 +554,7 @@ SELECT * FROM b1;
SET SESSION AUTHORIZATION regress_rls_alice;
DROP POLICY p1 ON document;
+DROP POLICY p1r ON document;
CREATE POLICY p1 ON document FOR SELECT USING (true);
CREATE POLICY p2 ON document FOR INSERT WITH CHECK (dauthor = current_user);
@@ -1577,6 +1615,7 @@ RESET client_min_messages;
DROP USER regress_rls_alice;
DROP USER regress_rls_bob;
DROP USER regress_rls_carol;
+DROP USER regress_rls_dave;
DROP USER regress_rls_exempt_user;
DROP ROLE regress_rls_group1;
DROP ROLE regress_rls_group2;
--
cgit v1.2.3
From dfe530a09226a9de80f2b4c3d5f667bf51481c49 Mon Sep 17 00:00:00 2001
From: Fujii Masao
Date: Tue, 6 Dec 2016 17:09:10 +0900
Subject: Improve documentation about pg_stat_replication view.
Add the descriptions of possible values in "state" and "sync_state" columns
of pg_stat_replication view.
Author: Michael Paquier, slightly modified by me
Discussion:
---
doc/src/sgml/high-availability.sgml | 6 +++++
doc/src/sgml/monitoring.sgml | 51 +++++++++++++++++++++++++++++++++++--
2 files changed, 55 insertions(+), 2 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml
index 5bedaf27a2..6b89507c8c 100644
--- a/doc/src/sgml/high-availability.sgml
+++ b/doc/src/sgml/high-availability.sgml
@@ -1161,6 +1161,10 @@ synchronous_standby_names = '2 (s1, s2, s3)'
s2> fails. s4> is an asynchronous standby since
its name is not in the list.
+
+ The synchronous states of standby servers can be viewed using
+ the pg_stat_replication view.
+
@@ -1235,6 +1239,8 @@ synchronous_standby_names = '2 (s1, s2, s3)'
will increase according to the length of time the standby has been down.
The standby is only able to become a synchronous standby
once it has reached streaming> state.
+ This state can be viewed using
+ the pg_stat_replication view.
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 02dab879d9..128ee13b5f 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -1357,7 +1357,33 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
state>text>
- Current WAL sender state
+ Current WAL sender state.
+ Possible values are:
+
+
+
+ startup>: This WAL sender is starting up.
+
+
+
+
+ catchup>: This WAL sender's connected standby is
+ catching up with the primary.
+
+
+
+
+ streaming>: This WAL sender is streaming changes
+ after its connected standby server has caught up with the primary.
+
+
+
+
+ backup>: This WAL sender is sending a backup.
+
+
+
+ sent_location>
@@ -1391,7 +1417,28 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
sync_state>text>
- Synchronous state of this standby server
+ Synchronous state of this standby server.
+ Possible values are:
+
+
+
+ async>: This standby server is asynchronous.
+
+
+
+
+ potential>: This standby server is now asynchronous,
+ but can potentially become synchronous if one of current
+ synchronous ones fails.
+
+
+
+
+ sync>: This standby server is synchronous.
+
+
+
+
--
cgit v1.2.3
From 4212cb73262bbdd164727beffa4c4744b4ead92d Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Tue, 6 Dec 2016 11:11:54 -0500
Subject: Fix interaction of parallel query with prepared statements.
Previously, a prepared statement created via a Parse message could get
a parallel plan, but one created with a PREPARE statement could not.
This state of affairs was due to confusion on my (rhaas) part: I
erroneously believed that a CREATE TABLE .. AS EXECUTE statement could
only be performed with a prepared statement by PREPARE, but in fact
one created by a Prepare message works just as well. Therefore, it
makes no sense to allow parallel query in one case but not the other.
To fix, allow parallel query with all prepared statements, but run
the parallel plan serially (i.e. without workers) in the case of
CREATE TABLE .. AS EXECUTE. Also, document this.
Amit Kapila and Tobias Bussman, plus an extra sentence of
documentation by me.
---
doc/src/sgml/parallel.sgml | 9 +++++++++
src/backend/commands/prepare.c | 2 +-
src/backend/executor/execMain.c | 7 ++++---
3 files changed, 14 insertions(+), 4 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/parallel.sgml b/doc/src/sgml/parallel.sgml
index f39c21a455..cf4c1c9c2a 100644
--- a/doc/src/sgml/parallel.sgml
+++ b/doc/src/sgml/parallel.sgml
@@ -227,6 +227,15 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
+
+
+ A prepared statement is executed using a CREATE TABLE .. AS
+ EXECUTE .. statement. This construct converts what otherwise
+ would have been a read-only operation into a read-write operation,
+ making it ineligible for parallel query.
+
+
+
The transaction isolation level is serializable. This situation
diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c
index cec37ce040..b01051df9d 100644
--- a/src/backend/commands/prepare.c
+++ b/src/backend/commands/prepare.c
@@ -159,7 +159,7 @@ PrepareQuery(PrepareStmt *stmt, const char *queryString)
nargs,
NULL,
NULL,
- 0, /* default cursor options */
+ CURSOR_OPT_PARALLEL_OK, /* allow parallel mode */
true); /* fixed result */
/*
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 32bb3f9205..71c07288a1 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -1540,10 +1540,11 @@ ExecutePlan(EState *estate,
estate->es_direction = direction;
/*
- * If a tuple count was supplied, we must force the plan to run without
- * parallelism, because we might exit early.
+ * If a tuple count was supplied or data is being written to relation, we
+ * must force the plan to run without parallelism, because we might exit
+ * early.
*/
- if (numberTuples)
+ if (numberTuples || dest->mydest == DestIntoRel)
use_parallel_mode = false;
/*
--
cgit v1.2.3
From 18f8f784cbbf96ef77eb8943b466b26605824c14 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Wed, 7 Dec 2016 12:19:56 -0500
Subject: Handle empty or all-blank PAGER setting more sanely in psql.
If the PAGER environment variable is set but contains an empty string,
psql would pass it to "sh" which would silently exit, causing whatever
query output we were printing to vanish entirely. This is quite
mystifying; it took a long time for us to figure out that this was the
cause of Joseph Brenner's trouble report. Rather than allowing that
to happen, we should treat this as another way to specify "no pager".
(We could alternatively treat it as selecting the default pager, but
it seems more likely that the former is what the user meant to achieve
by setting PAGER this way.)
Nonempty, but all-white-space, PAGER values have the same behavior, and
it's pretty easy to test for that, so let's handle that case the same way.
Most other cases of faulty PAGER values will result in the shell printing
some kind of complaint to stderr, which should be enough to diagnose the
problem, so we don't need to work harder than this. (Note that there's
been an intentional decision not to be very chatty about apparent failure
returns from the pager process, since that may happen if, eg, the user
quits the pager with control-C or some such. I'd just as soon not start
splitting hairs about which exit codes might merit making our own report.)
libpq's old PQprint() function was already on board with ignoring empty
PAGER values, but for consistency, make it ignore all-white-space values
as well.
It's been like this a long time, so back-patch to all supported branches.
Discussion: https://postgr.es/m/CAFfgvXWLOE2novHzYjmQK8-J6TmHz42G8f3X0SORM44+stUGmw@mail.gmail.com
---
doc/src/sgml/ref/psql-ref.sgml | 5 +++--
src/fe_utils/print.c | 6 ++++++
src/interfaces/libpq/fe-print.c | 3 ++-
3 files changed, 11 insertions(+), 3 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml
index 261652a45f..991573121b 100644
--- a/doc/src/sgml/ref/psql-ref.sgml
+++ b/doc/src/sgml/ref/psql-ref.sgml
@@ -3801,8 +3801,9 @@ $endif
If the query results do not fit on the screen, they are piped
through this command. Typical values are
more or less. The default
- is platform-dependent. The use of the pager can be disabled by
- using the \pset command.
+ is platform-dependent. Use of the pager can be disabled by setting
+ PAGER to empty, or by using pager-related options of
+ the \pset command.
diff --git a/src/fe_utils/print.c b/src/fe_utils/print.c
index 1ec74f1790..5c5d285be5 100644
--- a/src/fe_utils/print.c
+++ b/src/fe_utils/print.c
@@ -2874,6 +2874,12 @@ PageOutput(int lines, const printTableOpt *topt)
pagerprog = getenv("PAGER");
if (!pagerprog)
pagerprog = DEFAULT_PAGER;
+ else
+ {
+ /* if PAGER is empty or all-white-space, don't use pager */
+ if (strspn(pagerprog, " \t\r\n") == strlen(pagerprog))
+ return stdout;
+ }
disable_sigpipe_trap();
pagerpipe = popen(pagerprog, "w");
if (pagerpipe)
diff --git a/src/interfaces/libpq/fe-print.c b/src/interfaces/libpq/fe-print.c
index c33dc42a83..e596a51408 100644
--- a/src/interfaces/libpq/fe-print.c
+++ b/src/interfaces/libpq/fe-print.c
@@ -166,8 +166,9 @@ PQprint(FILE *fout, const PGresult *res, const PQprintOpt *po)
screen_size.ws_col = 80;
#endif
pagerenv = getenv("PAGER");
+ /* if PAGER is unset, empty or all-white-space, don't use pager */
if (pagerenv != NULL &&
- pagerenv[0] != '\0' &&
+ strspn(pagerenv, " \t\r\n") != strlen(pagerenv) &&
!po->html3 &&
((po->expanded &&
nTups * (nFields + 1) >= screen_size.ws_row) ||
--
cgit v1.2.3
From f0e44751d7175fa3394da2c8f85e3ceb3cdbfe63 Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Wed, 7 Dec 2016 13:17:43 -0500
Subject: Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
---
doc/src/sgml/catalogs.sgml | 129 +-
doc/src/sgml/ref/alter_table.sgml | 117 +-
doc/src/sgml/ref/create_foreign_table.sgml | 26 +
doc/src/sgml/ref/create_table.sgml | 154 +++
src/backend/access/common/reloptions.c | 2 +
src/backend/catalog/Makefile | 4 +-
src/backend/catalog/aclchk.c | 2 +
src/backend/catalog/dependency.c | 10 +-
src/backend/catalog/heap.c | 270 +++-
src/backend/catalog/index.c | 4 +-
src/backend/catalog/objectaddress.c | 5 +-
src/backend/catalog/partition.c | 1917 ++++++++++++++++++++++++++++
src/backend/catalog/pg_constraint.c | 2 +-
src/backend/commands/analyze.c | 6 +-
src/backend/commands/copy.c | 174 ++-
src/backend/commands/createas.c | 2 +-
src/backend/commands/indexcmds.c | 24 +-
src/backend/commands/lockcmds.c | 2 +-
src/backend/commands/policy.c | 5 +-
src/backend/commands/seclabel.c | 3 +-
src/backend/commands/sequence.c | 5 +-
src/backend/commands/tablecmds.c | 1567 +++++++++++++++++++++--
src/backend/commands/trigger.c | 16 +-
src/backend/commands/typecmds.c | 3 +-
src/backend/commands/vacuum.c | 3 +-
src/backend/commands/view.c | 3 +-
src/backend/executor/execMain.c | 125 +-
src/backend/executor/nodeModifyTable.c | 154 ++-
src/backend/nodes/copyfuncs.c | 81 ++
src/backend/nodes/equalfuncs.c | 70 +
src/backend/nodes/nodeFuncs.c | 6 +
src/backend/nodes/outfuncs.c | 55 +
src/backend/nodes/readfuncs.c | 34 +
src/backend/optimizer/util/plancat.c | 20 +
src/backend/parser/analyze.c | 8 +
src/backend/parser/gram.y | 347 ++++-
src/backend/parser/parse_agg.c | 10 +
src/backend/parser/parse_expr.c | 5 +
src/backend/parser/parse_func.c | 3 +
src/backend/parser/parse_utilcmd.c | 326 ++++-
src/backend/rewrite/rewriteDefine.c | 3 +-
src/backend/rewrite/rewriteHandler.c | 3 +-
src/backend/rewrite/rowsecurity.c | 3 +-
src/backend/tcop/utility.c | 6 +-
src/backend/utils/adt/ruleutils.c | 241 ++++
src/backend/utils/cache/relcache.c | 364 +++++-
src/backend/utils/cache/syscache.c | 12 +
src/bin/pg_dump/common.c | 90 ++
src/bin/pg_dump/pg_dump.c | 186 ++-
src/bin/pg_dump/pg_dump.h | 14 +
src/bin/psql/describe.c | 146 ++-
src/bin/psql/tab-complete.c | 6 +-
src/include/catalog/catversion.h | 2 +-
src/include/catalog/dependency.h | 3 +-
src/include/catalog/heap.h | 11 +
src/include/catalog/indexing.h | 3 +
src/include/catalog/partition.h | 83 ++
src/include/catalog/pg_class.h | 23 +-
src/include/catalog/pg_partitioned_table.h | 76 ++
src/include/catalog/pg_proc.h | 2 +
src/include/commands/defrem.h | 2 +
src/include/commands/tablecmds.h | 2 +-
src/include/executor/executor.h | 6 +
src/include/nodes/execnodes.h | 14 +
src/include/nodes/nodes.h | 5 +
src/include/nodes/parsenodes.h | 79 +-
src/include/parser/kwlist.h | 2 +
src/include/parser/parse_node.h | 3 +-
src/include/parser/parse_utilcmd.h | 2 +
src/include/pg_config_manual.h | 5 +
src/include/utils/builtins.h | 1 +
src/include/utils/rel.h | 89 ++
src/include/utils/syscache.h | 1 +
src/test/regress/expected/alter_table.out | 343 +++++
src/test/regress/expected/create_table.out | 413 ++++++
src/test/regress/expected/inherit.out | 272 ++++
src/test/regress/expected/insert.out | 140 ++
src/test/regress/expected/sanity_check.out | 1 +
src/test/regress/expected/update.out | 27 +
src/test/regress/sql/alter_table.sql | 294 +++++
src/test/regress/sql/create_table.sql | 315 +++++
src/test/regress/sql/inherit.sql | 52 +
src/test/regress/sql/insert.sql | 86 ++
src/test/regress/sql/update.sql | 21 +
src/tools/pgindent/typedefs.list | 6 +
85 files changed, 8886 insertions(+), 271 deletions(-)
create mode 100644 src/backend/catalog/partition.c
create mode 100644 src/include/catalog/partition.h
create mode 100644 src/include/catalog/pg_partitioned_table.h
(limited to 'doc/src')
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index c4246dcd86..9d2e89523d 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -225,6 +225,11 @@
template data for procedural languages
+
+ pg_partitioned_table
+ information about partition key of tables
+
+
pg_policyrow-security policies
@@ -1723,7 +1728,8 @@
char
- r> = ordinary table, i> = index,
+ r> = ordinary table, P> = partitioned table,
+ i> = index
S> = sequence, v> = view,
m> = materialized view,
c> = composite type, t> = TOAST table,
@@ -1839,6 +1845,13 @@
+
+ relispartition
+ bool
+
+ True if table is a partition
+
+
relfrozenxidxid
@@ -1885,6 +1898,16 @@
Access-method-specific options, as keyword=value> strings
+
+
+ relpartbound
+ pg_node_tree
+
+
+ If table is a partition (see relispartition),
+ internal representation of the partition bound
+
+
@@ -4689,6 +4712,110 @@
+
+ pg_partitioned_table
+
+
+ pg_partitioned_table
+
+
+
+ The catalog pg_partitioned_table stores
+ information about how tables are partitioned.
+
+
+
+ pg_partitioned_table> Columns
+
+
+
+
+ Name
+ Type
+ References
+ Description
+
+
+
+
+
+
+ partrelid
+ oid
+ pg_class.oid
+ The OID of the pg_class> entry for this partitioned table
+
+
+
+ partstrat
+ char
+
+
+ Partitioning strategy; l> = list partitioned table,
+ r> = range partitioned table
+
+
+
+
+ partnatts
+ int2
+
+ The number of columns in partition key
+
+
+
+ partattrs
+ int2vector
+ pg_attribute.attnum
+
+ This is an array of partnatts values that
+ indicate which table columns are part of the partition key. For
+ example, a value of 1 3 would mean that the first
+ and the third table columns make up the partition key. A zero in this
+ array indicates that the corresponding partition key column is an
+ expression, rather than a simple column reference.
+
+
+
+
+ partclass
+ oidvector
+ pg_opclass.oid
+
+ For each column in the partition key, this contains the OID of the
+ operator class to use. See
+ pg_opclass for details.
+
+
+
+
+ partcollation
+ oidvector
+ pg_opclass.oid
+
+ For each column in the partition key, this contains the OID of the
+ the collation to use for partitioning.
+
+
+
+
+ partexprs
+ pg_node_tree
+
+
+ Expression trees (in nodeToString()
+ representation) for partition key columns that are not simple column
+ references. This is a list with one element for each zero
+ entry in partattrs>. Null if all partition key columns
+ are simple references.
+
+
+
+
+
+
+
+
pg_policy
diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml
index e48ccf21e4..a6a43c4b30 100644
--- a/doc/src/sgml/ref/alter_table.sgml
+++ b/doc/src/sgml/ref/alter_table.sgml
@@ -33,6 +33,10 @@ ALTER TABLE [ IF EXISTS ] name
SET SCHEMA new_schema
ALTER TABLE ALL IN TABLESPACE name [ OWNED BY role_name [, ... ] ]
SET TABLESPACE new_tablespace [ NOWAIT ]
+ALTER TABLE [ IF EXISTS ] name
+ ATTACH PARTITION partition_name FOR VALUES partition_bound_spec
+ALTER TABLE [ IF EXISTS ] name
+ DETACH PARTITION partition_namewhere action is one of:
@@ -166,6 +170,12 @@ ALTER TABLE ALL IN TABLESPACE name
values or to reject null values. You can only use SET
NOT NULL> when the column contains no null values.
+
+
+ If this table is a partition, one cannot perform DROP NOT NULL>
+ on a column if it is marked NOT NULL in the parent
+ table.
+
@@ -704,13 +714,63 @@ ALTER TABLE ALL IN TABLESPACE name
+
+ ATTACH PARTITIONpartition_namepartition_bound_spec
+
+
+ This form attaches an existing table (which might itself be partitioned)
+ as a partition of the target table using the same syntax for
+ partition_bound_spec as
+ . The partition bound specification
+ must correspond to the partitioning strategy and partition key of the
+ target table. The table to be attached must have all the same columns
+ as the target table and no more; moreover, the column types must also
+ match. Also, it must have all the NOT NULL and
+ CHECK constraints of the target table. Currently
+ UNIQUE, PRIMARY KEY, and
+ FOREIGN KEY constraints are not considered.
+ If any of the CHECK constraints of the table being
+ attached is marked NO INHERIT, the command will fail;
+ such a constraint must be recreated without the NO INHERIT
+ clause.
+
+
+
+ A full table scan is performed on the table being attached to check that
+ no existing row in the table violates the partition constraint. It is
+ possible to avoid this scan by adding a valid CHECK
+ constraint to the table that would allow only the rows satisfying the
+ desired partition constraint before running this command. It will be
+ determined using such a constraint that the table need not be scanned
+ to validate the partition constraint. This does not work, however, if
+ any of the partition keys is an expression and the partition does not
+ accept NULL values. If attaching a list partition
+ that will not accept NULL values, also add
+ NOT NULL constraint to the partition key column,
+ unless it's an expression.
+
+
+
+
+
+ DETACH PARTITIONpartition_name
+
+
+ This form detaches specified partition of the target table. The detached
+ partition continues to exist as a standalone table, but no longer has any
+ ties to the table from which it was detached.
+
+
+
+
All the actions except RENAME,
- SET TABLESPACE and SET SCHEMA
- can be combined into
+ SET TABLESPACE, SET SCHEMA,
+ ATTACH PARTITION, and
+ DETACH PARTITION can be combined into
a list of multiple alterations to apply in parallel. For example, it
is possible to add several columns and/or alter the type of several
columns in a single command. This is particularly useful with large
@@ -721,8 +781,9 @@ ALTER TABLE ALL IN TABLESPACE name
You must own the table to use ALTER TABLE>.
To change the schema or tablespace of a table, you must also have
CREATE privilege on the new schema or tablespace.
- To add the table as a new child of a parent table, you must own the
- parent table as well.
+ To add the table as a new child of a parent table, you must own the parent
+ table as well. Also, to attach a table as a new partition of the table,
+ you must own the table being attached.
To alter the owner, you must also be a direct or indirect member of the new
owning role, and that role must have CREATE privilege on
the table's schema. (These restrictions enforce that altering the owner
@@ -938,6 +999,25 @@ ALTER TABLE ALL IN TABLESPACE name
+
+ partition_name
+
+
+ The name of the table to attach as a new partition or to detach from this table.
+
+
+
+
+
+ partition_bound_spec
+
+
+ The partition bound specification for a new partition. Refer to
+ for more details on the syntax of the same.
+
+
+
+
@@ -977,6 +1057,11 @@ ALTER TABLE ALL IN TABLESPACE name
but does not require a table rewrite.
+
+ Similarly, when attaching a new partition it may be scanned to verify that
+ existing rows meet the partition constraint.
+
+
The main reason for providing the option to specify multiple changes
in a single ALTER TABLE> is that multiple table scans or
@@ -1047,6 +1132,9 @@ ALTER TABLE ALL IN TABLESPACE name
COLUMN (i.e., ALTER TABLE ONLY ... DROP
COLUMN) never removes any descendant columns, but
instead marks them as independently defined rather than inherited.
+ A nonrecursive DROP COLUMN command will fail for a
+ partitioned table, because all partitions of a table must have the same
+ columns as the partitioning root.
@@ -1233,6 +1321,27 @@ ALTER TABLE distributors DROP CONSTRAINT distributors_pkey,
ADD CONSTRAINT distributors_pkey PRIMARY KEY USING INDEX dist_id_temp_idx;
+
+ Attach a partition to range partitioned table:
+
+ALTER TABLE measurement
+ ATTACH PARTITION measurement_y2016m07 FOR VALUES FROM ('2016-07-01') TO ('2016-08-01');
+
+
+
+ Attach a partition to list partitioned table:
+
+ALTER TABLE cities
+ ATTACH PARTITION cities_west FOR VALUES IN ('Los Angeles', 'San Francisco');
+
+
+
+ Detach a partition from partitioned table:
+
+ALTER TABLE cities
+ DETACH PARTITION measurement_y2015m12;
+
+
diff --git a/doc/src/sgml/ref/create_foreign_table.sgml b/doc/src/sgml/ref/create_foreign_table.sgml
index 413b033cb5..5d0dcf567b 100644
--- a/doc/src/sgml/ref/create_foreign_table.sgml
+++ b/doc/src/sgml/ref/create_foreign_table.sgml
@@ -27,6 +27,15 @@ CREATE FOREIGN TABLE [ IF NOT EXISTS ] table_name
SERVER server_name
[ OPTIONS ( option 'value' [, ... ] ) ]
+CREATE FOREIGN TABLE [ IF NOT EXISTS ] table_name
+ PARTITION OF parent_table [ (
+ { column_name WITH OPTIONS [ column_constraint [ ... ] ]
+ | table_constraint }
+ [, ... ]
+) ] partition_bound_spec
+ SERVER server_name
+[ OPTIONS ( option 'value' [, ... ] ) ]
+
where column_constraint is:
[ CONSTRAINT constraint_name ]
@@ -67,6 +76,12 @@ CHECK ( expression ) [ NO INHERIT ]
name as any existing data type in the same schema.
+
+ If PARTITION OF clause is specified then the table is
+ created as a partition of parent_table with specified
+ bounds.
+
+
To be able to create a foreign table, you must have USAGE
privilege on the foreign server, as well as USAGE
@@ -314,6 +329,17 @@ CREATE FOREIGN TABLE films (
SERVER film_server;
+
+ Create foreign table measurement_y2016m07>, which will be
+ accessed through the server server_07>, as a partition
+ of the range partitioned table measurement>:
+
+
+CREATE FOREIGN TABLE measurement_y2016m07
+ PARTITION OF measurement FOR VALUES FROM ('2016-07-01') TO ('2016-08-01')
+ SERVER server_07;
+
+
diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml
index bf2ad64d66..8bf8af302b 100644
--- a/doc/src/sgml/ref/create_table.sgml
+++ b/doc/src/sgml/ref/create_table.sgml
@@ -28,6 +28,7 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI
[, ... ]
] )
[ INHERITS ( parent_table [, ... ] ) ]
+[ PARTITION BY { RANGE | LIST } ( { column_name | ( expression ) } [ COLLATE collation ] [ opclass ] [, ... ] ) ]
[ WITH ( storage_parameter [= value] [, ... ] ) | WITH OIDS | WITHOUT OIDS ]
[ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ]
[ TABLESPACE tablespace_name ]
@@ -38,6 +39,18 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI
| table_constraint }
[, ... ]
) ]
+[ PARTITION BY { RANGE | LIST } ( { column_name | ( expression ) } [ COLLATE collation ] [ opclass ] [, ... ] ) ]
+[ WITH ( storage_parameter [= value] [, ... ] ) | WITH OIDS | WITHOUT OIDS ]
+[ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ]
+[ TABLESPACE tablespace_name ]
+
+CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXISTS ] table_name
+ PARTITION OF parent_table [ (
+ { column_name [ column_constraint [ ... ] ]
+ | table_constraint }
+ [, ... ]
+) ] FOR VALUES partition_bound_spec
+[ PARTITION BY { RANGE | LIST } ( { column_name | ( expression ) } [ COLLATE collation ] [ opclass ] [, ... ] ) ]
[ WITH ( storage_parameter [= value] [, ... ] ) | WITH OIDS | WITHOUT OIDS ]
[ ON COMMIT { PRESERVE ROWS | DELETE ROWS | DROP } ]
[ TABLESPACE tablespace_name ]
@@ -70,6 +83,11 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI
{ INCLUDING | EXCLUDING } { DEFAULTS | CONSTRAINTS | INDEXES | STORAGE | COMMENTS | ALL }
+and partition_bound_spec is:
+
+{ IN ( expression [, ...] ) |
+ FROM ( { expression | UNBOUNDED } [, ...] ) TO ( { expression | UNBOUNDED } [, ...] ) }
+
index_parameters in UNIQUE, PRIMARY KEY, and EXCLUDE constraints are:
[ WITH ( storage_parameter [= value] [, ... ] ) ]
@@ -229,6 +247,51 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI
+
+ PARTITION OF parent_table
+
+
+ Creates the table as partition of the specified
+ parent table.
+
+
+
+ The partition bound specification must correspond to the partitioning
+ method and partition key of the parent table, and must not overlap with
+ any existing partition of that parent.
+
+
+
+ A partition cannot have columns other than those inherited from the
+ parent. That includes the oid> column, which can be
+ specified using the WITH (OIDS) clause.
+ Defaults and constraints can optionally be specified for each of the
+ inherited columns. One can also specify table constraints in addition
+ to those inherited from the parent. If a check constraint with the name
+ matching one of the parent's constraint is specified, it is merged with
+ the latter, provided the specified condition is same.
+
+
+
+ Rows inserted into a partitioned table will be automatically routed to
+ the correct partition. If no suitable partition exists, an error will
+ occur.
+
+
+
+ A partition must have the same column names and types as the table of
+ which it is a partition. Therefore, modifications to the column names
+ or types of the partitioned table will automatically propagate to all
+ children, as will operations such as TRUNCATE which normally affect a
+ table and all of its inheritance children. It is also possible to
+ TRUNCATE a partition individually, just as for an inheritance child.
+ Note that dropping a partition with DROP TABLE
+ requires taking an ACCESS EXCLUSIVE lock on the
+ parent table.
+
+
+
+
column_name
@@ -313,6 +376,46 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI
+
+ PARTITION BY { RANGE | LIST } ( { column_name | ( expression ) } [ opclass ] [, ...] )
+
+
+ The optional PARTITION BY clause specifies a strategy
+ of partitioning the table. The table thus created is called a
+ partitioned table. The parenthesized list of
+ columns or expressions forms the partition key
+ for the table. When using range partitioning, the partition key can
+ include multiple columns or expressions, but for list partitioning, the
+ partition key must consist of a single column or expression. If no
+ btree operator class is specified when creating a partitioned table,
+ the default btree operator class for the datatype will be used. If
+ there is none, an error will be reported.
+
+
+
+ A partitioned table is divided into sub-tables (called partitions),
+ which are created using separate CREATE TABLE> commands.
+ The partitioned table is itself empty. A data row inserted into the
+ table is routed to a partition based on the value of columns or
+ expressions in the partition key. If no existing partition matches
+ the values in the new row, an error will be reported.
+
+
+
+ Partitioned tables do not support UNIQUE,
+ PRIMARY KEY, EXCLUDE, or
+ FOREIGN KEY constraints; however, you can define
+ these constraints on individual partitions.
+
+
+
+ When using range partitioning, a NOT NULL constraint
+ is added to each non-expression column in the partition key.
+
+
+
+
+
LIKE source_table [ like_option ... ]
@@ -1368,6 +1471,57 @@ CREATE TABLE employees OF employee_type (
PRIMARY KEY (name),
salary WITH OPTIONS DEFAULT 1000
);
+
+
+
+ Create a range partitioned table:
+
+CREATE TABLE measurement (
+ city_id int not null,
+ logdate date not null,
+ peaktemp int,
+ unitsales int
+) PARTITION BY RANGE (logdate);
+
+
+
+ Create a list partitioned table:
+
+CREATE TABLE cities (
+ name text not null,
+ population int,
+) PARTITION BY LIST (initcap(name));
+
+
+
+ Create partition of a range partitioned table:
+
+CREATE TABLE measurement_y2016m07
+ PARTITION OF measurement (
+ unitsales WITH OPTIONS DEFAULT 0
+) FOR VALUES FROM ('2016-07-01') TO ('2016-08-01');
+
+
+
+ Create partition of a list partitioned table:
+
+CREATE TABLE cities_west
+ PARTITION OF cities (
+ CONSTRAINT city_id_nonzero CHECK (city_id != 0)
+) FOR VALUES IN ('Los Angeles', 'San Francisco');
+
+
+
+ Create partition of a list partitioned table that is itself further
+ partitioned and then add a partition to it:
+
+CREATE TABLE cities_west
+ PARTITION OF cities (
+ CONSTRAINT city_id_nonzero CHECK (city_id != 0)
+) FOR VALUES IN ('Los Angeles', 'San Francisco') PARTITION BY RANGE (population);
+
+CREATE TABLE cities_west_10000_to_100000
+ PARTITION OF cities_west FOR VALUES FROM (10000) TO (100000);
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 83a97b06ab..34018cac7c 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -930,6 +930,7 @@ extractRelOptions(HeapTuple tuple, TupleDesc tupdesc,
case RELKIND_RELATION:
case RELKIND_TOASTVALUE:
case RELKIND_MATVIEW:
+ case RELKIND_PARTITIONED_TABLE:
options = heap_reloptions(classForm->relkind, datum, false);
break;
case RELKIND_VIEW:
@@ -1381,6 +1382,7 @@ heap_reloptions(char relkind, Datum reloptions, bool validate)
return (bytea *) rdopts;
case RELKIND_RELATION:
case RELKIND_MATVIEW:
+ case RELKIND_PARTITIONED_TABLE:
return default_reloptions(reloptions, validate, RELOPT_KIND_HEAP);
default:
/* other relkinds are not supported */
diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile
index 1ce7610049..2d5ac09bec 100644
--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@@ -11,7 +11,7 @@ top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
OBJS = catalog.o dependency.o heap.o index.o indexing.o namespace.o aclchk.o \
- objectaccess.o objectaddress.o pg_aggregate.o pg_collation.o \
+ objectaccess.o objectaddress.o partition.o pg_aggregate.o pg_collation.o \
pg_constraint.o pg_conversion.o \
pg_depend.o pg_enum.o pg_inherits.o pg_largeobject.o pg_namespace.o \
pg_operator.o pg_proc.o pg_range.o pg_db_role_setting.o pg_shdepend.o \
@@ -41,7 +41,7 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\
pg_foreign_data_wrapper.h pg_foreign_server.h pg_user_mapping.h \
pg_foreign_table.h pg_policy.h pg_replication_origin.h \
pg_default_acl.h pg_init_privs.h pg_seclabel.h pg_shseclabel.h \
- pg_collation.h pg_range.h pg_transform.h \
+ pg_collation.h pg_partitioned_table.h pg_range.h pg_transform.h \
toasting.h indexing.h \
)
diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c
index c0df6710d1..3086021432 100644
--- a/src/backend/catalog/aclchk.c
+++ b/src/backend/catalog/aclchk.c
@@ -768,6 +768,8 @@ objectsInSchemaToOids(GrantObjectType objtype, List *nspnames)
objects = list_concat(objects, objs);
objs = getRelationsInNamespace(namespaceId, RELKIND_FOREIGN_TABLE);
objects = list_concat(objects, objs);
+ objs = getRelationsInNamespace(namespaceId, RELKIND_PARTITIONED_TABLE);
+ objects = list_concat(objects, objs);
break;
case ACL_OBJECT_SEQUENCE:
objs = getRelationsInNamespace(namespaceId, RELKIND_SEQUENCE);
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c
index b697e88ef0..0cdd1c5c6c 100644
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -1352,7 +1352,8 @@ void
recordDependencyOnSingleRelExpr(const ObjectAddress *depender,
Node *expr, Oid relId,
DependencyType behavior,
- DependencyType self_behavior)
+ DependencyType self_behavior,
+ bool ignore_self)
{
find_expr_references_context context;
RangeTblEntry rte;
@@ -1407,9 +1408,10 @@ recordDependencyOnSingleRelExpr(const ObjectAddress *depender,
context.addrs->numrefs = outrefs;
/* Record the self-dependencies */
- recordMultipleDependencies(depender,
- self_addrs->refs, self_addrs->numrefs,
- self_behavior);
+ if (!ignore_self)
+ recordMultipleDependencies(depender,
+ self_addrs->refs, self_addrs->numrefs,
+ self_behavior);
free_object_addresses(self_addrs);
}
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 0b804e7ac6..7f5bad0b5d 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -41,6 +41,7 @@
#include "catalog/heap.h"
#include "catalog/index.h"
#include "catalog/objectaccess.h"
+#include "catalog/partition.h"
#include "catalog/pg_attrdef.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_constraint.h"
@@ -48,6 +49,8 @@
#include "catalog/pg_foreign_table.h"
#include "catalog/pg_inherits.h"
#include "catalog/pg_namespace.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_partitioned_table.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_type.h"
@@ -808,6 +811,7 @@ InsertPgClassTuple(Relation pg_class_desc,
values[Anum_pg_class_relhassubclass - 1] = BoolGetDatum(rd_rel->relhassubclass);
values[Anum_pg_class_relispopulated - 1] = BoolGetDatum(rd_rel->relispopulated);
values[Anum_pg_class_relreplident - 1] = CharGetDatum(rd_rel->relreplident);
+ values[Anum_pg_class_relispartition - 1] = BoolGetDatum(rd_rel->relispartition);
values[Anum_pg_class_relfrozenxid - 1] = TransactionIdGetDatum(rd_rel->relfrozenxid);
values[Anum_pg_class_relminmxid - 1] = MultiXactIdGetDatum(rd_rel->relminmxid);
if (relacl != (Datum) 0)
@@ -819,6 +823,9 @@ InsertPgClassTuple(Relation pg_class_desc,
else
nulls[Anum_pg_class_reloptions - 1] = true;
+ /* relpartbound is set by updating this tuple, if necessary */
+ nulls[Anum_pg_class_relpartbound - 1] = true;
+
tup = heap_form_tuple(RelationGetDescr(pg_class_desc), values, nulls);
/*
@@ -924,6 +931,9 @@ AddNewRelationTuple(Relation pg_class_desc,
new_rel_reltup->reltype = new_type_oid;
new_rel_reltup->reloftype = reloftype;
+ /* relispartition is always set by updating this tuple later */
+ new_rel_reltup->relispartition = false;
+
new_rel_desc->rd_att->tdtypeid = new_type_oid;
/* Now build and insert the tuple */
@@ -1104,7 +1114,8 @@ heap_create_with_catalog(const char *relname,
if (IsBinaryUpgrade &&
(relkind == RELKIND_RELATION || relkind == RELKIND_SEQUENCE ||
relkind == RELKIND_VIEW || relkind == RELKIND_MATVIEW ||
- relkind == RELKIND_COMPOSITE_TYPE || relkind == RELKIND_FOREIGN_TABLE))
+ relkind == RELKIND_COMPOSITE_TYPE || relkind == RELKIND_FOREIGN_TABLE ||
+ relkind == RELKIND_PARTITIONED_TABLE))
{
if (!OidIsValid(binary_upgrade_next_heap_pg_class_oid))
ereport(ERROR,
@@ -1138,6 +1149,7 @@ heap_create_with_catalog(const char *relname,
case RELKIND_VIEW:
case RELKIND_MATVIEW:
case RELKIND_FOREIGN_TABLE:
+ case RELKIND_PARTITIONED_TABLE:
relacl = get_user_default_acl(ACL_OBJECT_RELATION, ownerid,
relnamespace);
break;
@@ -1182,7 +1194,8 @@ heap_create_with_catalog(const char *relname,
relkind == RELKIND_VIEW ||
relkind == RELKIND_MATVIEW ||
relkind == RELKIND_FOREIGN_TABLE ||
- relkind == RELKIND_COMPOSITE_TYPE))
+ relkind == RELKIND_COMPOSITE_TYPE ||
+ relkind == RELKIND_PARTITIONED_TABLE))
new_array_oid = AssignTypeArrayOid();
/*
@@ -1349,7 +1362,9 @@ heap_create_with_catalog(const char *relname,
if (relpersistence == RELPERSISTENCE_UNLOGGED)
{
Assert(relkind == RELKIND_RELATION || relkind == RELKIND_MATVIEW ||
- relkind == RELKIND_TOASTVALUE);
+ relkind == RELKIND_TOASTVALUE ||
+ relkind == RELKIND_PARTITIONED_TABLE);
+
heap_create_init_fork(new_rel_desc);
}
@@ -1754,12 +1769,29 @@ void
heap_drop_with_catalog(Oid relid)
{
Relation rel;
+ Oid parentOid;
+ Relation parent = NULL;
/*
* Open and lock the relation.
*/
rel = relation_open(relid, AccessExclusiveLock);
+ /*
+ * If the relation is a partition, we must grab exclusive lock on its
+ * parent because we need to update its partition descriptor. We must
+ * take a table lock strong enough to prevent all queries on the parent
+ * from proceeding until we commit and send out a shared-cache-inval
+ * notice that will make them update their partition descriptor.
+ * Sometimes, doing this is cycles spent uselessly, especially if the
+ * parent will be dropped as part of the same command anyway.
+ */
+ if (rel->rd_rel->relispartition)
+ {
+ parentOid = get_partition_parent(relid);
+ parent = heap_open(parentOid, AccessExclusiveLock);
+ }
+
/*
* There can no longer be anyone *else* touching the relation, but we
* might still have open queries or cursors, or pending trigger events, in
@@ -1795,6 +1827,12 @@ heap_drop_with_catalog(Oid relid)
heap_close(rel, RowExclusiveLock);
}
+ /*
+ * If a partitioned table, delete the pg_partitioned_table tuple.
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ RemovePartitionKeyByRelId(relid);
+
/*
* Schedule unlinking of the relation's physical files at commit.
*/
@@ -1845,6 +1883,12 @@ heap_drop_with_catalog(Oid relid)
* delete relation tuple
*/
DeleteRelationTuple(relid);
+
+ if (parent)
+ {
+ CacheInvalidateRelcache(parent);
+ heap_close(parent, NoLock); /* keep the lock */
+ }
}
@@ -2027,6 +2071,17 @@ StoreRelCheck(Relation rel, char *ccname, Node *expr,
else
attNos = NULL;
+ /*
+ * Partitioned tables do not contain any rows themselves, so a NO INHERIT
+ * constraint makes no sense.
+ */
+ if (is_no_inherit &&
+ rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot add NO INHERIT constraint to partitioned table \"%s\"",
+ RelationGetRelationName(rel))));
+
/*
* Create the Check Constraint
*/
@@ -2440,8 +2495,11 @@ MergeWithExistingConstraint(Relation rel, char *ccname, Node *expr,
* definition) then interpret addition of a local constraint as a
* legal merge. This allows ALTER ADD CONSTRAINT on parent and
* child tables to be given in either order with same end state.
+ * However if the relation is a partition, all inherited
+ * constraints are always non-local, including those that were
+ * merged.
*/
- if (is_local && !con->conislocal)
+ if (is_local && !con->conislocal && !rel->rd_rel->relispartition)
allow_merge = true;
if (!found || !allow_merge)
@@ -2486,10 +2544,24 @@ MergeWithExistingConstraint(Relation rel, char *ccname, Node *expr,
tup = heap_copytuple(tup);
con = (Form_pg_constraint) GETSTRUCT(tup);
- if (is_local)
- con->conislocal = true;
+ /*
+ * In case of partitions, an inherited constraint must be
+ * inherited only once since it cannot have multiple parents and
+ * it is never considered local.
+ */
+ if (rel->rd_rel->relispartition)
+ {
+ con->coninhcount = 1;
+ con->conislocal = false;
+ }
else
- con->coninhcount++;
+ {
+ if (is_local)
+ con->conislocal = true;
+ else
+ con->coninhcount++;
+ }
+
if (is_no_inherit)
{
Assert(is_local);
@@ -3013,3 +3085,187 @@ insert_ordered_unique_oid(List *list, Oid datum)
lappend_cell_oid(list, prev, datum);
return list;
}
+
+/*
+ * StorePartitionKey
+ * Store information about the partition key rel into the catalog
+ */
+void
+StorePartitionKey(Relation rel,
+ char strategy,
+ int16 partnatts,
+ AttrNumber *partattrs,
+ List *partexprs,
+ Oid *partopclass,
+ Oid *partcollation)
+{
+ int i;
+ int2vector *partattrs_vec;
+ oidvector *partopclass_vec;
+ oidvector *partcollation_vec;
+ Datum partexprDatum;
+ Relation pg_partitioned_table;
+ HeapTuple tuple;
+ Datum values[Natts_pg_partitioned_table];
+ bool nulls[Natts_pg_partitioned_table];
+ ObjectAddress myself;
+ ObjectAddress referenced;
+
+ Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+
+ tuple = SearchSysCache1(PARTRELID,
+ ObjectIdGetDatum(RelationGetRelid(rel)));
+
+ /* Copy the partition attribute numbers, opclass OIDs into arrays */
+ partattrs_vec = buildint2vector(partattrs, partnatts);
+ partopclass_vec = buildoidvector(partopclass, partnatts);
+ partcollation_vec = buildoidvector(partcollation, partnatts);
+
+ /* Convert the expressions (if any) to a text datum */
+ if (partexprs)
+ {
+ char *exprString;
+
+ exprString = nodeToString(partexprs);
+ partexprDatum = CStringGetTextDatum(exprString);
+ pfree(exprString);
+ }
+ else
+ partexprDatum = (Datum) 0;
+
+ pg_partitioned_table = heap_open(PartitionedRelationId, RowExclusiveLock);
+
+ MemSet(nulls, false, sizeof(nulls));
+
+ /* Only this can ever be NULL */
+ if (!partexprDatum)
+ nulls[Anum_pg_partitioned_table_partexprs - 1] = true;
+
+ values[Anum_pg_partitioned_table_partrelid - 1] = ObjectIdGetDatum(RelationGetRelid(rel));
+ values[Anum_pg_partitioned_table_partstrat - 1] = CharGetDatum(strategy);
+ values[Anum_pg_partitioned_table_partnatts - 1] = Int16GetDatum(partnatts);
+ values[Anum_pg_partitioned_table_partattrs - 1] = PointerGetDatum(partattrs_vec);
+ values[Anum_pg_partitioned_table_partclass - 1] = PointerGetDatum(partopclass_vec);
+ values[Anum_pg_partitioned_table_partcollation - 1] = PointerGetDatum(partcollation_vec);
+ values[Anum_pg_partitioned_table_partexprs - 1] = partexprDatum;
+
+ tuple = heap_form_tuple(RelationGetDescr(pg_partitioned_table), values, nulls);
+
+ simple_heap_insert(pg_partitioned_table, tuple);
+
+ /* Update the indexes on pg_partitioned_table */
+ CatalogUpdateIndexes(pg_partitioned_table, tuple);
+ heap_close(pg_partitioned_table, RowExclusiveLock);
+
+ /* Mark this relation as dependent on a few things as follows */
+ myself.classId = RelationRelationId;
+ myself.objectId = RelationGetRelid(rel);;
+ myself.objectSubId = 0;
+
+ /* Operator class and collation per key column */
+ for (i = 0; i < partnatts; i++)
+ {
+ referenced.classId = OperatorClassRelationId;
+ referenced.objectId = partopclass[i];
+ referenced.objectSubId = 0;
+
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+
+ referenced.classId = CollationRelationId;
+ referenced.objectId = partcollation[i];
+ referenced.objectSubId = 0;
+
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+ }
+
+ /*
+ * Anything mentioned in the expressions. We must ignore the column
+ * references, which will depend on the table itself; there is no
+ * separate partition key object.
+ */
+ if (partexprs)
+ recordDependencyOnSingleRelExpr(&myself,
+ (Node *) partexprs,
+ RelationGetRelid(rel),
+ DEPENDENCY_NORMAL,
+ DEPENDENCY_AUTO, true);
+
+ /*
+ * We must invalidate the relcache so that the next
+ * CommandCounterIncrement() will cause the same to be rebuilt using the
+ * information in just created catalog entry.
+ */
+ CacheInvalidateRelcache(rel);
+}
+
+/*
+ * RemovePartitionKeyByRelId
+ * Remove pg_partitioned_table entry for a relation
+ */
+void
+RemovePartitionKeyByRelId(Oid relid)
+{
+ Relation rel;
+ HeapTuple tuple;
+
+ rel = heap_open(PartitionedRelationId, RowExclusiveLock);
+
+ tuple = SearchSysCache1(PARTRELID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for partition key of relation %u",
+ relid);
+
+ simple_heap_delete(rel, &tuple->t_self);
+
+ ReleaseSysCache(tuple);
+ heap_close(rel, RowExclusiveLock);
+}
+
+/*
+ * StorePartitionBound
+ * Update pg_class tuple of rel to store the partition bound and set
+ * relispartition to true
+ */
+void
+StorePartitionBound(Relation rel, Node *bound)
+{
+ Relation classRel;
+ HeapTuple tuple,
+ newtuple;
+ Datum new_val[Natts_pg_class];
+ bool new_null[Natts_pg_class],
+ new_repl[Natts_pg_class];
+
+ /* Update pg_class tuple */
+ classRel = heap_open(RelationRelationId, RowExclusiveLock);
+ tuple = SearchSysCacheCopy1(RELOID,
+ ObjectIdGetDatum(RelationGetRelid(rel)));
+#ifdef USE_ASSERT_CHECKING
+ {
+ Form_pg_class classForm;
+ bool isnull;
+
+ classForm = (Form_pg_class) GETSTRUCT(tuple);
+ Assert(!classForm->relispartition);
+ (void) SysCacheGetAttr(RELOID, tuple, Anum_pg_class_relpartbound,
+ &isnull);
+ Assert(isnull);
+ }
+#endif
+
+ /* Fill in relpartbound value */
+ memset(new_val, 0, sizeof(new_val));
+ memset(new_null, false, sizeof(new_null));
+ memset(new_repl, false, sizeof(new_repl));
+ new_val[Anum_pg_class_relpartbound - 1] = CStringGetTextDatum(nodeToString(bound));
+ new_null[Anum_pg_class_relpartbound - 1] = false;
+ new_repl[Anum_pg_class_relpartbound - 1] = true;
+ newtuple = heap_modify_tuple(tuple, RelationGetDescr(classRel),
+ new_val, new_null, new_repl);
+ /* Also set the flag */
+ ((Form_pg_class) GETSTRUCT(newtuple))->relispartition = true;
+ simple_heap_update(classRel, &newtuple->t_self, newtuple);
+ CatalogUpdateIndexes(classRel, newtuple);
+ heap_freetuple(newtuple);
+ heap_close(classRel, RowExclusiveLock);
+}
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 08b646d8f3..08b0989112 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -1043,7 +1043,7 @@ index_create(Relation heapRelation,
(Node *) indexInfo->ii_Expressions,
heapRelationId,
DEPENDENCY_NORMAL,
- DEPENDENCY_AUTO);
+ DEPENDENCY_AUTO, false);
}
/* Store dependencies on anything mentioned in predicate */
@@ -1053,7 +1053,7 @@ index_create(Relation heapRelation,
(Node *) indexInfo->ii_Predicate,
heapRelationId,
DEPENDENCY_NORMAL,
- DEPENDENCY_AUTO);
+ DEPENDENCY_AUTO, false);
}
}
else
diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c
index d531d17cdb..bb4b080b00 100644
--- a/src/backend/catalog/objectaddress.c
+++ b/src/backend/catalog/objectaddress.c
@@ -1204,7 +1204,8 @@ get_relation_by_qualified_name(ObjectType objtype, List *objname,
RelationGetRelationName(relation))));
break;
case OBJECT_TABLE:
- if (relation->rd_rel->relkind != RELKIND_RELATION)
+ if (relation->rd_rel->relkind != RELKIND_RELATION &&
+ relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table",
@@ -3244,6 +3245,7 @@ getRelationDescription(StringInfo buffer, Oid relid)
switch (relForm->relkind)
{
case RELKIND_RELATION:
+ case RELKIND_PARTITIONED_TABLE:
appendStringInfo(buffer, _("table %s"),
relname);
break;
@@ -3701,6 +3703,7 @@ getRelationTypeDescription(StringInfo buffer, Oid relid, int32 objectSubId)
switch (relForm->relkind)
{
case RELKIND_RELATION:
+ case RELKIND_PARTITIONED_TABLE:
appendStringInfoString(buffer, "table");
break;
case RELKIND_INDEX:
diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c
new file mode 100644
index 0000000000..6dab45f0ed
--- /dev/null
+++ b/src/backend/catalog/partition.c
@@ -0,0 +1,1917 @@
+/*-------------------------------------------------------------------------
+ *
+ * partition.c
+ * Partitioning related data structures and functions.
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/catalog/partition.c
+ *
+ *-------------------------------------------------------------------------
+*/
+
+#include "postgres.h"
+
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/nbtree.h"
+#include "access/sysattr.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/objectaddress.h"
+#include "catalog/partition.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/pg_inherits_fn.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_type.h"
+#include "executor/executor.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/parsenodes.h"
+#include "optimizer/clauses.h"
+#include "optimizer/planmain.h"
+#include "optimizer/var.h"
+#include "rewrite/rewriteManip.h"
+#include "storage/lmgr.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/memutils.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/ruleutils.h"
+#include "utils/syscache.h"
+
+/*
+ * Information about bounds of a partitioned relation
+ *
+ * A list partition datum that is known to be NULL is never put into the
+ * datums array. Instead, it is tracked using has_null and null_index fields.
+ *
+ * In the case of range partitioning, ndatums will typically be far less than
+ * 2 * nparts, because a partition's upper bound and the next partition's lower
+ * bound are the same in most common cases, and we only store one of them.
+ *
+ * In the case of list partitioning, the indexes array stores one entry for
+ * every datum, which is the index of the partition that accepts a given datum.
+ * In case of range partitioning, it stores one entry per distinct range
+ * datum, which is the index of the partition for which a given datum
+ * is an upper bound.
+ */
+
+/* Ternary value to represent what's contained in a range bound datum */
+typedef enum RangeDatumContent
+{
+ RANGE_DATUM_FINITE = 0, /* actual datum stored elsewhere */
+ RANGE_DATUM_NEG_INF, /* negative infinity */
+ RANGE_DATUM_POS_INF /* positive infinity */
+} RangeDatumContent;
+
+typedef struct PartitionBoundInfoData
+{
+ char strategy; /* list or range bounds? */
+ int ndatums; /* Length of the datums following array */
+ Datum **datums; /* Array of datum-tuples with key->partnatts
+ * datums each */
+ RangeDatumContent **content;/* what's contained in each range bound datum?
+ * (see the above enum); NULL for list
+ * partitioned tables */
+ int *indexes; /* Partition indexes; one entry per member of
+ * the datums array (plus one if range
+ * partitioned table) */
+ bool has_null; /* Is there a null-accepting partition? false
+ * for range partitioned tables */
+ int null_index; /* Index of the null-accepting partition; -1
+ * for range partitioned tables */
+} PartitionBoundInfoData;
+
+/*
+ * When qsort'ing partition bounds after reading from the catalog, each bound
+ * is represented with one of the following structs.
+ */
+
+/* One value coming from some (index'th) list partition */
+typedef struct PartitionListValue
+{
+ int index;
+ Datum value;
+} PartitionListValue;
+
+/* One bound of a range partition */
+typedef struct PartitionRangeBound
+{
+ int index;
+ Datum *datums; /* range bound datums */
+ RangeDatumContent *content; /* what's contained in each datum? */
+ bool lower; /* this is the lower (vs upper) bound */
+} PartitionRangeBound;
+
+static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
+ void *arg);
+static int32 qsort_partition_rbound_cmp(const void *a, const void *b,
+ void *arg);
+
+static List *get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec);
+static List *get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec);
+static Oid get_partition_operator(PartitionKey key, int col,
+ StrategyNumber strategy, bool *need_relabel);
+static List *generate_partition_qual(Relation rel, bool recurse);
+
+static PartitionRangeBound *make_one_range_bound(PartitionKey key, int index,
+ List *datums, bool lower);
+static int32 partition_rbound_cmp(PartitionKey key,
+ Datum *datums1, RangeDatumContent *content1, bool lower1,
+ PartitionRangeBound *b2);
+static int32 partition_rbound_datum_cmp(PartitionKey key,
+ Datum *rb_datums, RangeDatumContent *rb_content,
+ Datum *tuple_datums);
+
+static int32 partition_bound_cmp(PartitionKey key,
+ PartitionBoundInfo boundinfo,
+ int offset, void *probe, bool probe_is_bound);
+static int partition_bound_bsearch(PartitionKey key,
+ PartitionBoundInfo boundinfo,
+ void *probe, bool probe_is_bound, bool *is_equal);
+
+/* Support get_partition_for_tuple() */
+static void FormPartitionKeyDatum(PartitionDispatch pd,
+ TupleTableSlot *slot,
+ EState *estate,
+ Datum *values,
+ bool *isnull);
+
+/*
+ * RelationBuildPartitionDesc
+ * Form rel's partition descriptor
+ *
+ * Not flushed from the cache by RelationClearRelation() unless changed because
+ * of addition or removal of partition.
+ */
+void
+RelationBuildPartitionDesc(Relation rel)
+{
+ List *inhoids,
+ *partoids;
+ Oid *oids = NULL;
+ List *boundspecs = NIL;
+ ListCell *cell;
+ int i,
+ nparts;
+ PartitionKey key = RelationGetPartitionKey(rel);
+ PartitionDesc result;
+ MemoryContext oldcxt;
+
+ int ndatums = 0;
+
+ /* List partitioning specific */
+ PartitionListValue **all_values = NULL;
+ bool found_null = false;
+ int null_index = -1;
+
+ /* Range partitioning specific */
+ PartitionRangeBound **rbounds = NULL;
+
+ /*
+ * The following could happen in situations where rel has a pg_class entry
+ * but not the pg_partitioned_table entry yet.
+ */
+ if (key == NULL)
+ return;
+
+ /* Get partition oids from pg_inherits */
+ inhoids = find_inheritance_children(RelationGetRelid(rel), NoLock);
+
+ /* Collect bound spec nodes in a list */
+ i = 0;
+ partoids = NIL;
+ foreach(cell, inhoids)
+ {
+ Oid inhrelid = lfirst_oid(cell);
+ HeapTuple tuple;
+ Datum datum;
+ bool isnull;
+ Node *boundspec;
+
+ tuple = SearchSysCache1(RELOID, inhrelid);
+
+ /*
+ * It is possible that the pg_class tuple of a partition has not been
+ * updated yet to set its relpartbound field. The only case where
+ * this happens is when we open the parent relation to check using its
+ * partition descriptor that a new partition's bound does not overlap
+ * some existing partition.
+ */
+ if (!((Form_pg_class) GETSTRUCT(tuple))->relispartition)
+ {
+ ReleaseSysCache(tuple);
+ continue;
+ }
+
+ datum = SysCacheGetAttr(RELOID, tuple,
+ Anum_pg_class_relpartbound,
+ &isnull);
+ Assert(!isnull);
+ boundspec = (Node *) stringToNode(TextDatumGetCString(datum));
+ boundspecs = lappend(boundspecs, boundspec);
+ partoids = lappend_oid(partoids, inhrelid);
+ ReleaseSysCache(tuple);
+ }
+
+ nparts = list_length(partoids);
+
+ if (nparts > 0)
+ {
+ oids = (Oid *) palloc(nparts * sizeof(Oid));
+ i = 0;
+ foreach(cell, partoids)
+ oids[i++] = lfirst_oid(cell);
+
+ /* Convert from node to the internal representation */
+ if (key->strategy == PARTITION_STRATEGY_LIST)
+ {
+ List *non_null_values = NIL;
+
+ /*
+ * Create a unified list of non-null values across all partitions.
+ */
+ i = 0;
+ found_null = false;
+ null_index = -1;
+ foreach(cell, boundspecs)
+ {
+ ListCell *c;
+ PartitionBoundSpec *spec = lfirst(cell);
+
+ if (spec->strategy != PARTITION_STRATEGY_LIST)
+ elog(ERROR, "invalid strategy in partition bound spec");
+
+ foreach(c, spec->listdatums)
+ {
+ Const *val = lfirst(c);
+ PartitionListValue *list_value = NULL;
+
+ if (!val->constisnull)
+ {
+ list_value = (PartitionListValue *)
+ palloc0(sizeof(PartitionListValue));
+ list_value->index = i;
+ list_value->value = val->constvalue;
+ }
+ else
+ {
+ /*
+ * Never put a null into the values array, flag
+ * instead for the code further down below where we
+ * construct the actual relcache struct.
+ */
+ if (found_null)
+ elog(ERROR, "found null more than once");
+ found_null = true;
+ null_index = i;
+ }
+
+ if (list_value)
+ non_null_values = lappend(non_null_values,
+ list_value);
+ }
+
+ i++;
+ }
+
+ ndatums = list_length(non_null_values);
+
+ /*
+ * Collect all list values in one array. Alongside the value, we
+ * also save the index of partition the value comes from.
+ */
+ all_values = (PartitionListValue **) palloc(ndatums *
+ sizeof(PartitionListValue *));
+ i = 0;
+ foreach(cell, non_null_values)
+ {
+ PartitionListValue *src = lfirst(cell);
+
+ all_values[i] = (PartitionListValue *)
+ palloc(sizeof(PartitionListValue));
+ all_values[i]->value = src->value;
+ all_values[i]->index = src->index;
+ i++;
+ }
+
+ qsort_arg(all_values, ndatums, sizeof(PartitionListValue *),
+ qsort_partition_list_value_cmp, (void *) key);
+ }
+ else if (key->strategy == PARTITION_STRATEGY_RANGE)
+ {
+ int j,
+ k;
+ PartitionRangeBound **all_bounds,
+ *prev;
+ bool *distinct_indexes;
+
+ all_bounds = (PartitionRangeBound **) palloc0(2 * nparts *
+ sizeof(PartitionRangeBound *));
+ distinct_indexes = (bool *) palloc(2 * nparts * sizeof(bool));
+
+ /*
+ * Create a unified list of range bounds across all the
+ * partitions.
+ */
+ i = j = 0;
+ foreach(cell, boundspecs)
+ {
+ PartitionBoundSpec *spec = lfirst(cell);
+ PartitionRangeBound *lower,
+ *upper;
+
+ if (spec->strategy != PARTITION_STRATEGY_RANGE)
+ elog(ERROR, "invalid strategy in partition bound spec");
+
+ lower = make_one_range_bound(key, i, spec->lowerdatums,
+ true);
+ upper = make_one_range_bound(key, i, spec->upperdatums,
+ false);
+ all_bounds[j] = lower;
+ all_bounds[j + 1] = upper;
+ j += 2;
+ i++;
+ }
+ Assert(j == 2 * nparts);
+
+ /* Sort all the bounds in ascending order */
+ qsort_arg(all_bounds, 2 * nparts,
+ sizeof(PartitionRangeBound *),
+ qsort_partition_rbound_cmp,
+ (void *) key);
+
+ /*
+ * Count the number of distinct bounds to allocate an array of
+ * that size.
+ */
+ ndatums = 0;
+ prev = NULL;
+ for (i = 0; i < 2 * nparts; i++)
+ {
+ PartitionRangeBound *cur = all_bounds[i];
+ bool is_distinct = false;
+ int j;
+
+ /* Is current bound is distinct from the previous? */
+ for (j = 0; j < key->partnatts; j++)
+ {
+ Datum cmpval;
+
+ if (prev == NULL)
+ {
+ is_distinct = true;
+ break;
+ }
+
+ /*
+ * If either of them has infinite element, we can't equate
+ * them. Even when both are infinite, they'd have
+ * opposite signs, because only one of cur and prev is a
+ * lower bound).
+ */
+ if (cur->content[j] != RANGE_DATUM_FINITE ||
+ prev->content[j] != RANGE_DATUM_FINITE)
+ {
+ is_distinct = true;
+ break;
+ }
+ cmpval = FunctionCall2Coll(&key->partsupfunc[j],
+ key->partcollation[j],
+ cur->datums[j],
+ prev->datums[j]);
+ if (DatumGetInt32(cmpval) != 0)
+ {
+ is_distinct = true;
+ break;
+ }
+ }
+
+ /*
+ * Count the current bound if it is distinct from the previous
+ * one. Also, store if the index i contains a distinct bound
+ * that we'd like put in the relcache array.
+ */
+ if (is_distinct)
+ {
+ distinct_indexes[i] = true;
+ ndatums++;
+ }
+ else
+ distinct_indexes[i] = false;
+
+ prev = cur;
+ }
+
+ /*
+ * Finally save them in an array from where they will be copied
+ * into the relcache.
+ */
+ rbounds = (PartitionRangeBound **) palloc(ndatums *
+ sizeof(PartitionRangeBound *));
+ k = 0;
+ for (i = 0; i < 2 * nparts; i++)
+ {
+ if (distinct_indexes[i])
+ rbounds[k++] = all_bounds[i];
+ }
+ Assert(k == ndatums);
+ }
+ else
+ elog(ERROR, "unexpected partition strategy: %d",
+ (int) key->strategy);
+ }
+
+ /* Now build the actual relcache partition descriptor */
+ rel->rd_pdcxt = AllocSetContextCreate(CacheMemoryContext,
+ RelationGetRelationName(rel),
+ ALLOCSET_DEFAULT_SIZES);
+ oldcxt = MemoryContextSwitchTo(rel->rd_pdcxt);
+
+ result = (PartitionDescData *) palloc0(sizeof(PartitionDescData));
+ result->nparts = nparts;
+ if (nparts > 0)
+ {
+ PartitionBoundInfo boundinfo;
+ int *mapping;
+ int next_index = 0;
+
+ result->oids = (Oid *) palloc0(nparts * sizeof(Oid));
+
+ boundinfo = (PartitionBoundInfoData *)
+ palloc0(sizeof(PartitionBoundInfoData));
+ boundinfo->strategy = key->strategy;
+ boundinfo->ndatums = ndatums;
+ boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
+
+ /* Initialize mapping array with invalid values */
+ mapping = (int *) palloc(sizeof(int) * nparts);
+ for (i = 0; i < nparts; i++)
+ mapping[i] = -1;
+
+ switch (key->strategy)
+ {
+ case PARTITION_STRATEGY_LIST:
+ {
+ boundinfo->has_null = found_null;
+ boundinfo->indexes = (int *) palloc(ndatums * sizeof(int));
+
+ /*
+ * Copy values. Indexes of individual values are mapped
+ * to canonical values so that they match for any two list
+ * partitioned tables with same number of partitions and
+ * same lists per partition. One way to canonicalize is
+ * to assign the index in all_values[] of the smallest
+ * value of each partition, as the index of all of the
+ * partition's values.
+ */
+ for (i = 0; i < ndatums; i++)
+ {
+ boundinfo->datums[i] = (Datum *) palloc(sizeof(Datum));
+ boundinfo->datums[i][0] = datumCopy(all_values[i]->value,
+ key->parttypbyval[0],
+ key->parttyplen[0]);
+
+ /* If the old index has no mapping, assign one */
+ if (mapping[all_values[i]->index] == -1)
+ mapping[all_values[i]->index] = next_index++;
+
+ boundinfo->indexes[i] = mapping[all_values[i]->index];
+ }
+
+ /*
+ * If null-accepting partition has no mapped index yet,
+ * assign one. This could happen if such partition
+ * accepts only null and hence not covered in the above
+ * loop which only handled non-null values.
+ */
+ if (found_null)
+ {
+ Assert(null_index >= 0);
+ if (mapping[null_index] == -1)
+ mapping[null_index] = next_index++;
+ }
+
+ /* All partition must now have a valid mapping */
+ Assert(next_index == nparts);
+
+ if (found_null)
+ boundinfo->null_index = mapping[null_index];
+ else
+ boundinfo->null_index = -1;
+ break;
+ }
+
+ case PARTITION_STRATEGY_RANGE:
+ {
+ boundinfo->content = (RangeDatumContent **) palloc(ndatums *
+ sizeof(RangeDatumContent *));
+ boundinfo->indexes = (int *) palloc((ndatums + 1) *
+ sizeof(int));
+
+ for (i = 0; i < ndatums; i++)
+ {
+ int j;
+
+ boundinfo->datums[i] = (Datum *) palloc(key->partnatts *
+ sizeof(Datum));
+ boundinfo->content[i] = (RangeDatumContent *)
+ palloc(key->partnatts *
+ sizeof(RangeDatumContent));
+ for (j = 0; j < key->partnatts; j++)
+ {
+ if (rbounds[i]->content[j] == RANGE_DATUM_FINITE)
+ boundinfo->datums[i][j] =
+ datumCopy(rbounds[i]->datums[j],
+ key->parttypbyval[j],
+ key->parttyplen[j]);
+ /* Remember, we are storing the tri-state value. */
+ boundinfo->content[i][j] = rbounds[i]->content[j];
+ }
+
+ /*
+ * There is no mapping for invalid indexes.
+ *
+ * Any lower bounds in the rbounds array have invalid
+ * indexes assigned, because the values between the
+ * previous bound (if there is one) and this (lower)
+ * bound are not part of the range of any existing
+ * partition.
+ */
+ if (rbounds[i]->lower)
+ boundinfo->indexes[i] = -1;
+ else
+ {
+ int orig_index = rbounds[i]->index;
+
+ /* If the old index is has no mapping, assign one */
+ if (mapping[orig_index] == -1)
+ mapping[orig_index] = next_index++;
+
+ boundinfo->indexes[i] = mapping[orig_index];
+ }
+ }
+ boundinfo->indexes[i] = -1;
+ break;
+ }
+
+ default:
+ elog(ERROR, "unexpected partition strategy: %d",
+ (int) key->strategy);
+ }
+
+ result->boundinfo = boundinfo;
+
+ /*
+ * Now assign OIDs from the original array into mapped indexes of the
+ * result array. Order of OIDs in the former is defined by the
+ * catalog scan that retrived them, whereas that in the latter is
+ * defined by canonicalized representation of the list values or the
+ * range bounds.
+ */
+ for (i = 0; i < nparts; i++)
+ result->oids[mapping[i]] = oids[i];
+ pfree(mapping);
+ }
+
+ MemoryContextSwitchTo(oldcxt);
+ rel->rd_partdesc = result;
+}
+
+/*
+ * Are two partition bound collections logically equal?
+ *
+ * Used in the keep logic of relcache.c (ie, in RelationClearRelation()).
+ * This is also useful when b1 and b2 are bound collections of two separate
+ * relations, respectively, because PartitionBoundInfo is a canonical
+ * representation of partition bounds.
+ */
+bool
+partition_bounds_equal(PartitionKey key,
+ PartitionBoundInfo b1, PartitionBoundInfo b2)
+{
+ int i;
+
+ if (b1->strategy != b2->strategy)
+ return false;
+
+ if (b1->ndatums != b2->ndatums)
+ return false;
+
+ if (b1->has_null != b2->has_null)
+ return false;
+
+ if (b1->null_index != b2->null_index)
+ return false;
+
+ for (i = 0; i < b1->ndatums; i++)
+ {
+ int j;
+
+ for (j = 0; j < key->partnatts; j++)
+ {
+ int32 cmpval;
+
+ cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[j],
+ key->partcollation[j],
+ b1->datums[i][j],
+ b2->datums[i][j]));
+ if (cmpval != 0)
+ return false;
+
+ /* Range partitions can have infinite datums */
+ if (b1->content != NULL && b1->content[i][j] != b2->content[i][j])
+ return false;
+ }
+
+ if (b1->indexes[i] != b2->indexes[i])
+ return false;
+ }
+
+ /* There are ndatums+1 indexes in case of range partitions */
+ if (key->strategy == PARTITION_STRATEGY_RANGE &&
+ b1->indexes[i] != b2->indexes[i])
+ return false;
+
+ return true;
+}
+
+/*
+ * check_new_partition_bound
+ *
+ * Checks if the new partition's bound overlaps any of the existing partitions
+ * of parent. Also performs additional checks as necessary per strategy.
+ */
+void
+check_new_partition_bound(char *relname, Relation parent, Node *bound)
+{
+ PartitionBoundSpec *spec = (PartitionBoundSpec *) bound;
+ PartitionKey key = RelationGetPartitionKey(parent);
+ PartitionDesc partdesc = RelationGetPartitionDesc(parent);
+ ParseState *pstate = make_parsestate(NULL);
+ int with = -1;
+ bool overlap = false;
+
+ switch (key->strategy)
+ {
+ case PARTITION_STRATEGY_LIST:
+ {
+ Assert(spec->strategy == PARTITION_STRATEGY_LIST);
+
+ if (partdesc->nparts > 0)
+ {
+ PartitionBoundInfo boundinfo = partdesc->boundinfo;
+ ListCell *cell;
+
+ Assert(boundinfo &&
+ boundinfo->strategy == PARTITION_STRATEGY_LIST &&
+ (boundinfo->ndatums > 0 || boundinfo->has_null));
+
+ foreach(cell, spec->listdatums)
+ {
+ Const *val = lfirst(cell);
+
+ if (!val->constisnull)
+ {
+ int offset;
+ bool equal;
+
+ offset = partition_bound_bsearch(key, boundinfo,
+ &val->constvalue,
+ true, &equal);
+ if (offset >= 0 && equal)
+ {
+ overlap = true;
+ with = boundinfo->indexes[offset];
+ break;
+ }
+ }
+ else if (boundinfo->has_null)
+ {
+ overlap = true;
+ with = boundinfo->null_index;
+ break;
+ }
+ }
+ }
+
+ break;
+ }
+
+ case PARTITION_STRATEGY_RANGE:
+ {
+ PartitionRangeBound *lower,
+ *upper;
+
+ Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
+ lower = make_one_range_bound(key, -1, spec->lowerdatums, true);
+ upper = make_one_range_bound(key, -1, spec->upperdatums, false);
+
+ /*
+ * First check if the resulting range would be empty with
+ * specified lower and upper bounds
+ */
+ if (partition_rbound_cmp(key, lower->datums, lower->content, true,
+ upper) >= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("cannot create range partition with empty range"),
+ parser_errposition(pstate, spec->location)));
+
+ if (partdesc->nparts > 0)
+ {
+ PartitionBoundInfo boundinfo = partdesc->boundinfo;
+ int off1,
+ off2;
+ bool equal = false;
+
+ Assert(boundinfo && boundinfo->ndatums > 0 &&
+ boundinfo->strategy == PARTITION_STRATEGY_RANGE);
+
+ /*
+ * Find the greatest index of a range bound that is less
+ * than or equal with the new lower bound.
+ */
+ off1 = partition_bound_bsearch(key, boundinfo, lower, true,
+ &equal);
+
+ /*
+ * If equal has been set to true, that means the new lower
+ * bound is found to be equal with the bound at off1,
+ * which clearly means an overlap with the partition at
+ * index off1+1).
+ *
+ * Otherwise, check if there is a "gap" that could be
+ * occupied by the new partition. In case of a gap, the
+ * new upper bound should not cross past the upper
+ * boundary of the gap, that is, off2 == off1 should be
+ * true.
+ */
+ if (!equal && boundinfo->indexes[off1 + 1] < 0)
+ {
+ off2 = partition_bound_bsearch(key, boundinfo, upper,
+ true, &equal);
+
+ if (equal || off1 != off2)
+ {
+ overlap = true;
+ with = boundinfo->indexes[off2 + 1];
+ }
+ }
+ else
+ {
+ overlap = true;
+ with = boundinfo->indexes[off1 + 1];
+ }
+ }
+
+ break;
+ }
+
+ default:
+ elog(ERROR, "unexpected partition strategy: %d",
+ (int) key->strategy);
+ }
+
+ if (overlap)
+ {
+ Assert(with >= 0);
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("partition \"%s\" would overlap partition \"%s\"",
+ relname, get_rel_name(partdesc->oids[with])),
+ parser_errposition(pstate, spec->location)));
+ }
+}
+
+/*
+ * get_partition_parent
+ *
+ * Returns inheritance parent of a partition by scanning pg_inherits
+ *
+ * Note: Because this function assumes that the relation whose OID is passed
+ * as an argument will have precisely one parent, it should only be called
+ * when it is known that the relation is a partition.
+ */
+Oid
+get_partition_parent(Oid relid)
+{
+ Form_pg_inherits form;
+ Relation catalogRelation;
+ SysScanDesc scan;
+ ScanKeyData key[2];
+ HeapTuple tuple;
+ Oid result;
+
+ catalogRelation = heap_open(InheritsRelationId, AccessShareLock);
+
+ ScanKeyInit(&key[0],
+ Anum_pg_inherits_inhrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(relid));
+ ScanKeyInit(&key[1],
+ Anum_pg_inherits_inhseqno,
+ BTEqualStrategyNumber, F_INT4EQ,
+ Int32GetDatum(1));
+
+ scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, true,
+ NULL, 2, key);
+
+ tuple = systable_getnext(scan);
+ Assert(HeapTupleIsValid(tuple));
+
+ form = (Form_pg_inherits) GETSTRUCT(tuple);
+ result = form->inhparent;
+
+ systable_endscan(scan);
+ heap_close(catalogRelation, AccessShareLock);
+
+ return result;
+}
+
+/*
+ * get_qual_from_partbound
+ * Given a parser node for partition bound, return the list of executable
+ * expressions as partition constraint
+ */
+List *
+get_qual_from_partbound(Relation rel, Relation parent, Node *bound)
+{
+ PartitionBoundSpec *spec = (PartitionBoundSpec *) bound;
+ PartitionKey key = RelationGetPartitionKey(parent);
+ List *my_qual = NIL;
+ TupleDesc parent_tupdesc = RelationGetDescr(parent);
+ AttrNumber parent_attno;
+ AttrNumber *partition_attnos;
+ bool found_whole_row;
+
+ Assert(key != NULL);
+
+ switch (key->strategy)
+ {
+ case PARTITION_STRATEGY_LIST:
+ Assert(spec->strategy == PARTITION_STRATEGY_LIST);
+ my_qual = get_qual_for_list(key, spec);
+ break;
+
+ case PARTITION_STRATEGY_RANGE:
+ Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
+ my_qual = get_qual_for_range(key, spec);
+ break;
+
+ default:
+ elog(ERROR, "unexpected partition strategy: %d",
+ (int) key->strategy);
+ }
+
+ /*
+ * Translate vars in the generated expression to have correct attnos. Note
+ * that the vars in my_qual bear attnos dictated by key which carries
+ * physical attnos of the parent. We must allow for a case where physical
+ * attnos of a partition can be different from the parent.
+ */
+ partition_attnos = (AttrNumber *)
+ palloc0(parent_tupdesc->natts * sizeof(AttrNumber));
+ for (parent_attno = 1; parent_attno <= parent_tupdesc->natts;
+ parent_attno++)
+ {
+ Form_pg_attribute attribute = parent_tupdesc->attrs[parent_attno - 1];
+ char *attname = NameStr(attribute->attname);
+ AttrNumber partition_attno;
+
+ if (attribute->attisdropped)
+ continue;
+
+ partition_attno = get_attnum(RelationGetRelid(rel), attname);
+ partition_attnos[parent_attno - 1] = partition_attno;
+ }
+
+ my_qual = (List *) map_variable_attnos((Node *) my_qual,
+ 1, 0,
+ partition_attnos,
+ parent_tupdesc->natts,
+ &found_whole_row);
+ /* there can never be a whole-row reference here */
+ if (found_whole_row)
+ elog(ERROR, "unexpected whole-row reference found in partition key");
+
+ return my_qual;
+}
+
+/*
+ * RelationGetPartitionQual
+ *
+ * Returns a list of partition quals
+ */
+List *
+RelationGetPartitionQual(Relation rel, bool recurse)
+{
+ /* Quick exit */
+ if (!rel->rd_rel->relispartition)
+ return NIL;
+
+ return generate_partition_qual(rel, recurse);
+}
+
+/* Turn an array of OIDs with N elements into a list */
+#define OID_ARRAY_TO_LIST(arr, N, list) \
+ do\
+ {\
+ int i;\
+ for (i = 0; i < (N); i++)\
+ (list) = lappend_oid((list), (arr)[i]);\
+ } while(0)
+
+/*
+ * RelationGetPartitionDispatchInfo
+ * Returns information necessary to route tuples down a partition tree
+ *
+ * All the partitions will be locked with lockmode, unless it is NoLock.
+ * A list of the OIDs of all the leaf partition of rel is returned in
+ * *leaf_part_oids.
+ */
+PartitionDispatch *
+RelationGetPartitionDispatchInfo(Relation rel, int lockmode,
+ int *num_parted, List **leaf_part_oids)
+{
+ PartitionDesc rootpartdesc = RelationGetPartitionDesc(rel);
+ PartitionDispatchData **pd;
+ List *all_parts = NIL,
+ *parted_rels;
+ ListCell *lc;
+ int i,
+ k;
+
+ /*
+ * Lock partitions and make a list of the partitioned ones to prepare
+ * their PartitionDispatch objects below.
+ *
+ * Cannot use find_all_inheritors() here, because then the order of OIDs
+ * in parted_rels list would be unknown, which does not help, because we
+ * we assign indexes within individual PartitionDispatch in an order that
+ * is predetermined (determined by the order of OIDs in individual
+ * partition descriptors).
+ */
+ *num_parted = 1;
+ parted_rels = list_make1(rel);
+ OID_ARRAY_TO_LIST(rootpartdesc->oids, rootpartdesc->nparts, all_parts);
+ foreach(lc, all_parts)
+ {
+ Relation partrel = heap_open(lfirst_oid(lc), lockmode);
+ PartitionDesc partdesc = RelationGetPartitionDesc(partrel);
+
+ /*
+ * If this partition is a partitioned table, add its children to the
+ * end of the list, so that they are processed as well.
+ */
+ if (partdesc)
+ {
+ (*num_parted)++;
+ parted_rels = lappend(parted_rels, partrel);
+ OID_ARRAY_TO_LIST(partdesc->oids, partdesc->nparts, all_parts);
+ }
+ else
+ heap_close(partrel, NoLock);
+
+ /*
+ * We keep the partitioned ones open until we're done using the
+ * information being collected here (for example, see
+ * ExecEndModifyTable).
+ */
+ }
+
+ /* Generate PartitionDispatch objects for all partitioned tables */
+ pd = (PartitionDispatchData **) palloc(*num_parted *
+ sizeof(PartitionDispatchData *));
+ *leaf_part_oids = NIL;
+ i = k = 0;
+ foreach(lc, parted_rels)
+ {
+ Relation partrel = lfirst(lc);
+ PartitionKey partkey = RelationGetPartitionKey(partrel);
+ PartitionDesc partdesc = RelationGetPartitionDesc(partrel);
+ int j,
+ m;
+
+ pd[i] = (PartitionDispatch) palloc(sizeof(PartitionDispatchData));
+ pd[i]->reldesc = partrel;
+ pd[i]->key = partkey;
+ pd[i]->keystate = NIL;
+ pd[i]->partdesc = partdesc;
+ pd[i]->indexes = (int *) palloc(partdesc->nparts * sizeof(int));
+
+ m = 0;
+ for (j = 0; j < partdesc->nparts; j++)
+ {
+ Oid partrelid = partdesc->oids[j];
+
+ if (get_rel_relkind(partrelid) != RELKIND_PARTITIONED_TABLE)
+ {
+ *leaf_part_oids = lappend_oid(*leaf_part_oids, partrelid);
+ pd[i]->indexes[j] = k++;
+ }
+ else
+ {
+ /*
+ * We can assign indexes this way because of the way
+ * parted_rels has been generated.
+ */
+ pd[i]->indexes[j] = -(i + 1 + m);
+ m++;
+ }
+ }
+ i++;
+ }
+
+ return pd;
+}
+
+/* Module-local functions */
+
+/*
+ * get_qual_for_list
+ *
+ * Returns a list of expressions to use as a list partition's constraint.
+ */
+static List *
+get_qual_for_list(PartitionKey key, PartitionBoundSpec *spec)
+{
+ List *result;
+ ArrayExpr *arr;
+ ScalarArrayOpExpr *opexpr;
+ ListCell *cell,
+ *prev,
+ *next;
+ Node *keyCol;
+ Oid operoid;
+ bool need_relabel,
+ list_has_null = false;
+ NullTest *nulltest1 = NULL,
+ *nulltest2 = NULL;
+
+ /* Left operand is either a simple Var or arbitrary expression */
+ if (key->partattrs[0] != 0)
+ keyCol = (Node *) makeVar(1,
+ key->partattrs[0],
+ key->parttypid[0],
+ key->parttypmod[0],
+ key->parttypcoll[0],
+ 0);
+ else
+ keyCol = (Node *) copyObject(linitial(key->partexprs));
+
+ /*
+ * We must remove any NULL value in the list; we handle it separately
+ * below.
+ */
+ prev = NULL;
+ for (cell = list_head(spec->listdatums); cell; cell = next)
+ {
+ Const *val = (Const *) lfirst(cell);
+
+ next = lnext(cell);
+
+ if (val->constisnull)
+ {
+ list_has_null = true;
+ spec->listdatums = list_delete_cell(spec->listdatums,
+ cell, prev);
+ }
+ else
+ prev = cell;
+ }
+
+ if (!list_has_null)
+ {
+ /*
+ * Gin up a col IS NOT NULL test that will be AND'd with other
+ * expressions
+ */
+ nulltest1 = makeNode(NullTest);
+ nulltest1->arg = (Expr *) keyCol;
+ nulltest1->nulltesttype = IS_NOT_NULL;
+ nulltest1->argisrow = false;
+ nulltest1->location = -1;
+ }
+ else
+ {
+ /*
+ * Gin up a col IS NULL test that will be OR'd with other expressions
+ */
+ nulltest2 = makeNode(NullTest);
+ nulltest2->arg = (Expr *) keyCol;
+ nulltest2->nulltesttype = IS_NULL;
+ nulltest2->argisrow = false;
+ nulltest2->location = -1;
+ }
+
+ /* Right operand is an ArrayExpr containing this partition's values */
+ arr = makeNode(ArrayExpr);
+ arr->array_typeid = !type_is_array(key->parttypid[0])
+ ? get_array_type(key->parttypid[0])
+ : key->parttypid[0];
+ arr->array_collid = key->parttypcoll[0];
+ arr->element_typeid = key->parttypid[0];
+ arr->elements = spec->listdatums;
+ arr->multidims = false;
+ arr->location = -1;
+
+ /* Get the correct btree equality operator */
+ operoid = get_partition_operator(key, 0, BTEqualStrategyNumber,
+ &need_relabel);
+ if (need_relabel || key->partcollation[0] != key->parttypcoll[0])
+ keyCol = (Node *) makeRelabelType((Expr *) keyCol,
+ key->partopcintype[0],
+ -1,
+ key->partcollation[0],
+ COERCE_EXPLICIT_CAST);
+
+ /* Build leftop = ANY (rightop) */
+ opexpr = makeNode(ScalarArrayOpExpr);
+ opexpr->opno = operoid;
+ opexpr->opfuncid = get_opcode(operoid);
+ opexpr->useOr = true;
+ opexpr->inputcollid = key->partcollation[0];
+ opexpr->args = list_make2(keyCol, arr);
+ opexpr->location = -1;
+
+ if (nulltest1)
+ result = list_make2(nulltest1, opexpr);
+ else if (nulltest2)
+ {
+ Expr *or;
+
+ or = makeBoolExpr(OR_EXPR, list_make2(nulltest2, opexpr), -1);
+ result = list_make1(or);
+ }
+ else
+ result = list_make1(opexpr);
+
+ return result;
+}
+
+/*
+ * get_qual_for_range
+ *
+ * Get a list of OpExpr's to use as a range partition's constraint.
+ */
+static List *
+get_qual_for_range(PartitionKey key, PartitionBoundSpec *spec)
+{
+ List *result = NIL;
+ ListCell *cell1,
+ *cell2,
+ *partexprs_item;
+ int i;
+
+ /*
+ * Iterate over columns of the key, emitting an OpExpr for each using the
+ * corresponding lower and upper datums as constant operands.
+ */
+ i = 0;
+ partexprs_item = list_head(key->partexprs);
+ forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums)
+ {
+ PartitionRangeDatum *ldatum = lfirst(cell1),
+ *udatum = lfirst(cell2);
+ Node *keyCol;
+ Const *lower_val = NULL,
+ *upper_val = NULL;
+ EState *estate;
+ MemoryContext oldcxt;
+ Expr *test_expr;
+ ExprState *test_exprstate;
+ Datum test_result;
+ bool isNull;
+ bool need_relabel = false;
+ Oid operoid;
+ NullTest *nulltest;
+
+ /* Left operand */
+ if (key->partattrs[i] != 0)
+ {
+ keyCol = (Node *) makeVar(1,
+ key->partattrs[i],
+ key->parttypid[i],
+ key->parttypmod[i],
+ key->parttypcoll[i],
+ 0);
+ }
+ else
+ {
+ keyCol = (Node *) copyObject(lfirst(partexprs_item));
+ partexprs_item = lnext(partexprs_item);
+ }
+
+ /*
+ * Emit a IS NOT NULL expression for non-Var keys, because whereas
+ * simple attributes are covered by NOT NULL constraints, expression
+ * keys are still nullable which is not acceptable in case of range
+ * partitioning.
+ */
+ if (!IsA(keyCol, Var))
+ {
+ nulltest = makeNode(NullTest);
+ nulltest->arg = (Expr *) keyCol;
+ nulltest->nulltesttype = IS_NOT_NULL;
+ nulltest->argisrow = false;
+ nulltest->location = -1;
+ result = lappend(result, nulltest);
+ }
+
+ /*
+ * Stop at this column if either of lower or upper datum is infinite,
+ * but do emit an OpExpr for the non-infinite datum.
+ */
+ if (!ldatum->infinite)
+ lower_val = (Const *) ldatum->value;
+ if (!udatum->infinite)
+ upper_val = (Const *) udatum->value;
+
+ /*
+ * If lower_val and upper_val are both finite and happen to be equal,
+ * emit only (keyCol = lower_val) for this column, because all rows in
+ * this partition could only ever contain this value (ie, lower_val)
+ * in the current partitioning column. We must consider further
+ * columns because the above condition does not fully constrain the
+ * rows of this partition.
+ */
+ if (lower_val && upper_val)
+ {
+ /* Get the correct btree equality operator for the test */
+ operoid = get_partition_operator(key, i, BTEqualStrategyNumber,
+ &need_relabel);
+
+ /* Create the test expression */
+ estate = CreateExecutorState();
+ oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
+ test_expr = make_opclause(operoid,
+ BOOLOID,
+ false,
+ (Expr *) lower_val,
+ (Expr *) upper_val,
+ InvalidOid,
+ key->partcollation[i]);
+ fix_opfuncids((Node *) test_expr);
+ test_exprstate = ExecInitExpr(test_expr, NULL);
+ test_result = ExecEvalExprSwitchContext(test_exprstate,
+ GetPerTupleExprContext(estate),
+ &isNull, NULL);
+ MemoryContextSwitchTo(oldcxt);
+ FreeExecutorState(estate);
+
+ if (DatumGetBool(test_result))
+ {
+ /* This can never be, but it's better to make sure */
+ if (i == key->partnatts - 1)
+ elog(ERROR, "invalid range bound specification");
+
+ if (need_relabel || key->partcollation[i] != key->parttypcoll[i])
+ keyCol = (Node *) makeRelabelType((Expr *) keyCol,
+ key->partopcintype[i],
+ -1,
+ key->partcollation[i],
+ COERCE_EXPLICIT_CAST);
+ result = lappend(result,
+ make_opclause(operoid,
+ BOOLOID,
+ false,
+ (Expr *) keyCol,
+ (Expr *) lower_val,
+ InvalidOid,
+ key->partcollation[i]));
+
+ /* Go over to consider the next column. */
+ i++;
+ continue;
+ }
+ }
+
+ /*
+ * We can say here that lower_val != upper_val. Emit expressions
+ * (keyCol >= lower_val) and (keyCol < upper_val), then stop.
+ */
+ if (lower_val)
+ {
+ operoid = get_partition_operator(key, i,
+ BTGreaterEqualStrategyNumber,
+ &need_relabel);
+
+ if (need_relabel || key->partcollation[i] != key->parttypcoll[i])
+ keyCol = (Node *) makeRelabelType((Expr *) keyCol,
+ key->partopcintype[i],
+ -1,
+ key->partcollation[i],
+ COERCE_EXPLICIT_CAST);
+ result = lappend(result,
+ make_opclause(operoid,
+ BOOLOID,
+ false,
+ (Expr *) keyCol,
+ (Expr *) lower_val,
+ InvalidOid,
+ key->partcollation[i]));
+ }
+
+ if (upper_val)
+ {
+ operoid = get_partition_operator(key, i,
+ BTLessStrategyNumber,
+ &need_relabel);
+
+ if (need_relabel || key->partcollation[i] != key->parttypcoll[i])
+ keyCol = (Node *) makeRelabelType((Expr *) keyCol,
+ key->partopcintype[i],
+ -1,
+ key->partcollation[i],
+ COERCE_EXPLICIT_CAST);
+
+ result = lappend(result,
+ make_opclause(operoid,
+ BOOLOID,
+ false,
+ (Expr *) keyCol,
+ (Expr *) upper_val,
+ InvalidOid,
+ key->partcollation[i]));
+ }
+
+ /*
+ * We can stop at this column, because we would not have checked the
+ * next column when routing a given row into this partition.
+ */
+ break;
+ }
+
+ return result;
+}
+
+/*
+ * get_partition_operator
+ *
+ * Return oid of the operator of given strategy for a given partition key
+ * column.
+ */
+static Oid
+get_partition_operator(PartitionKey key, int col, StrategyNumber strategy,
+ bool *need_relabel)
+{
+ Oid operoid;
+
+ /*
+ * First check if there exists an operator of the given strategy, with
+ * this column's type as both its lefttype and righttype, in the
+ * partitioning operator family specified for the column.
+ */
+ operoid = get_opfamily_member(key->partopfamily[col],
+ key->parttypid[col],
+ key->parttypid[col],
+ strategy);
+
+ /*
+ * If one doesn't exist, we must resort to using an operator in the same
+ * opreator family but with the operator class declared input type. It is
+ * OK to do so, because the column's type is known to be binary-coercible
+ * with the operator class input type (otherwise, the operator class in
+ * question would not have been accepted as the partitioning operator
+ * class). We must however inform the caller to wrap the non-Const
+ * expression with a RelabelType node to denote the implicit coercion. It
+ * ensures that the resulting expression structurally matches similarly
+ * processed expressions within the optimizer.
+ */
+ if (!OidIsValid(operoid))
+ {
+ operoid = get_opfamily_member(key->partopfamily[col],
+ key->partopcintype[col],
+ key->partopcintype[col],
+ strategy);
+ *need_relabel = true;
+ }
+ else
+ *need_relabel = false;
+
+ if (!OidIsValid(operoid))
+ elog(ERROR, "could not find operator for partitioning");
+
+ return operoid;
+}
+
+/*
+ * generate_partition_qual
+ *
+ * Generate partition predicate from rel's partition bound expression
+ *
+ * Result expression tree is stored CacheMemoryContext to ensure it survives
+ * as long as the relcache entry. But we should be running in a less long-lived
+ * working context. To avoid leaking cache memory if this routine fails partway
+ * through, we build in working memory and then copy the completed structure
+ * into cache memory.
+ */
+static List *
+generate_partition_qual(Relation rel, bool recurse)
+{
+ HeapTuple tuple;
+ MemoryContext oldcxt;
+ Datum boundDatum;
+ bool isnull;
+ Node *bound;
+ List *my_qual = NIL,
+ *result = NIL;
+ Relation parent;
+
+ /* Guard against stack overflow due to overly deep partition tree */
+ check_stack_depth();
+
+ /* Grab at least an AccessShareLock on the parent table */
+ parent = heap_open(get_partition_parent(RelationGetRelid(rel)),
+ AccessShareLock);
+
+ /* Quick copy */
+ if (rel->rd_partcheck)
+ {
+ if (parent->rd_rel->relispartition && recurse)
+ result = list_concat(generate_partition_qual(parent, true),
+ copyObject(rel->rd_partcheck));
+ else
+ result = copyObject(rel->rd_partcheck);
+
+ heap_close(parent, AccessShareLock);
+ return result;
+ }
+
+ /* Get pg_class.relpartbound */
+ if (!rel->rd_rel->relispartition) /* should not happen */
+ elog(ERROR, "relation \"%s\" has relispartition = false",
+ RelationGetRelationName(rel));
+ tuple = SearchSysCache1(RELOID, RelationGetRelid(rel));
+ boundDatum = SysCacheGetAttr(RELOID, tuple,
+ Anum_pg_class_relpartbound,
+ &isnull);
+ if (isnull) /* should not happen */
+ elog(ERROR, "relation \"%s\" has relpartbound = null",
+ RelationGetRelationName(rel));
+ bound = stringToNode(TextDatumGetCString(boundDatum));
+ ReleaseSysCache(tuple);
+
+ my_qual = get_qual_from_partbound(rel, parent, bound);
+
+ /* If requested, add parent's quals to the list (if any) */
+ if (parent->rd_rel->relispartition && recurse)
+ {
+ List *parent_check;
+
+ parent_check = generate_partition_qual(parent, true);
+ result = list_concat(parent_check, my_qual);
+ }
+ else
+ result = my_qual;
+
+ /* Save a copy of my_qual in the relcache */
+ oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+ rel->rd_partcheck = copyObject(my_qual);
+ MemoryContextSwitchTo(oldcxt);
+
+ /* Keep the parent locked until commit */
+ heap_close(parent, NoLock);
+
+ return result;
+}
+
+/* ----------------
+ * FormPartitionKeyDatum
+ * Construct values[] and isnull[] arrays for the partition key
+ * of a tuple.
+ *
+ * pkinfo partition key execution info
+ * slot Heap tuple from which to extract partition key
+ * estate executor state for evaluating any partition key
+ * expressions (must be non-NULL)
+ * values Array of partition key Datums (output area)
+ * isnull Array of is-null indicators (output area)
+ *
+ * the ecxt_scantuple slot of estate's per-tuple expr context must point to
+ * the heap tuple passed in.
+ * ----------------
+ */
+static void
+FormPartitionKeyDatum(PartitionDispatch pd,
+ TupleTableSlot *slot,
+ EState *estate,
+ Datum *values,
+ bool *isnull)
+{
+ ListCell *partexpr_item;
+ int i;
+
+ if (pd->key->partexprs != NIL && pd->keystate == NIL)
+ {
+ /* Check caller has set up context correctly */
+ Assert(estate != NULL &&
+ GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
+
+ /* First time through, set up expression evaluation state */
+ pd->keystate = (List *) ExecPrepareExpr((Expr *) pd->key->partexprs,
+ estate);
+ }
+
+ partexpr_item = list_head(pd->keystate);
+ for (i = 0; i < pd->key->partnatts; i++)
+ {
+ AttrNumber keycol = pd->key->partattrs[i];
+ Datum datum;
+ bool isNull;
+
+ if (keycol != 0)
+ {
+ /* Plain column; get the value directly from the heap tuple */
+ datum = slot_getattr(slot, keycol, &isNull);
+ }
+ else
+ {
+ /* Expression; need to evaluate it */
+ if (partexpr_item == NULL)
+ elog(ERROR, "wrong number of partition key expressions");
+ datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item),
+ GetPerTupleExprContext(estate),
+ &isNull,
+ NULL);
+ partexpr_item = lnext(partexpr_item);
+ }
+ values[i] = datum;
+ isnull[i] = isNull;
+ }
+
+ if (partexpr_item != NULL)
+ elog(ERROR, "wrong number of partition key expressions");
+}
+
+/*
+ * get_partition_for_tuple
+ * Finds a leaf partition for tuple contained in *slot
+ *
+ * Returned value is the sequence number of the leaf partition thus found,
+ * or -1 if no leaf partition is found for the tuple. *failed_at is set
+ * to the OID of the partitioned table whose partition was not found in
+ * the latter case.
+ */
+int
+get_partition_for_tuple(PartitionDispatch * pd,
+ TupleTableSlot *slot,
+ EState *estate,
+ Oid *failed_at)
+{
+ PartitionDispatch parent;
+ Datum values[PARTITION_MAX_KEYS];
+ bool isnull[PARTITION_MAX_KEYS];
+ int cur_offset,
+ cur_index;
+ int i;
+
+ /* start with the root partitioned table */
+ parent = pd[0];
+ while (true)
+ {
+ PartitionKey key = parent->key;
+ PartitionDesc partdesc = parent->partdesc;
+
+ /* Quick exit */
+ if (partdesc->nparts == 0)
+ {
+ *failed_at = RelationGetRelid(parent->reldesc);
+ return -1;
+ }
+
+ /* Extract partition key from tuple */
+ FormPartitionKeyDatum(parent, slot, estate, values, isnull);
+
+ if (key->strategy == PARTITION_STRATEGY_RANGE)
+ {
+ /* Disallow nulls in the range partition key of the tuple */
+ for (i = 0; i < key->partnatts; i++)
+ if (isnull[i])
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("range partition key of row contains null")));
+ }
+
+ if (partdesc->boundinfo->has_null && isnull[0])
+ /* Tuple maps to the null-accepting list partition */
+ cur_index = partdesc->boundinfo->null_index;
+ else
+ {
+ /* Else bsearch in partdesc->boundinfo */
+ bool equal = false;
+
+ cur_offset = partition_bound_bsearch(key, partdesc->boundinfo,
+ values, false, &equal);
+ switch (key->strategy)
+ {
+ case PARTITION_STRATEGY_LIST:
+ if (cur_offset >= 0 && equal)
+ cur_index = partdesc->boundinfo->indexes[cur_offset];
+ else
+ cur_index = -1;
+ break;
+
+ case PARTITION_STRATEGY_RANGE:
+
+ /*
+ * Offset returned is such that the bound at offset is
+ * found to be less or equal with the tuple. So, the bound
+ * at offset+1 would be the upper bound.
+ */
+ cur_index = partdesc->boundinfo->indexes[cur_offset + 1];
+ break;
+
+ default:
+ elog(ERROR, "unexpected partition strategy: %d",
+ (int) key->strategy);
+ }
+ }
+
+ /*
+ * cur_index < 0 means we failed to find a partition of this parent.
+ * cur_index >= 0 means we either found the leaf partition, or the
+ * next parent to find a partition of.
+ */
+ if (cur_index < 0)
+ {
+ *failed_at = RelationGetRelid(parent->reldesc);
+ return -1;
+ }
+ else if (parent->indexes[cur_index] < 0)
+ parent = pd[-parent->indexes[cur_index]];
+ else
+ break;
+ }
+
+ return parent->indexes[cur_index];
+}
+
+/*
+ * qsort_partition_list_value_cmp
+ *
+ * Compare two list partition bound datums
+ */
+static int32
+qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
+{
+ Datum val1 = (*(const PartitionListValue **) a)->value,
+ val2 = (*(const PartitionListValue **) b)->value;
+ PartitionKey key = (PartitionKey) arg;
+
+ return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
+ key->partcollation[0],
+ val1, val2));
+}
+
+/*
+ * make_one_range_bound
+ *
+ * Return a PartitionRangeBound given a list of PartitionRangeDatum elements
+ * and a flag telling whether the bound is lower or not. Made into a function
+ * because there are multiple sites that want to use this facility.
+ */
+static PartitionRangeBound *
+make_one_range_bound(PartitionKey key, int index, List *datums, bool lower)
+{
+ PartitionRangeBound *bound;
+ ListCell *cell;
+ int i;
+
+ bound = (PartitionRangeBound *) palloc0(sizeof(PartitionRangeBound));
+ bound->index = index;
+ bound->datums = (Datum *) palloc0(key->partnatts * sizeof(Datum));
+ bound->content = (RangeDatumContent *) palloc0(key->partnatts *
+ sizeof(RangeDatumContent));
+ bound->lower = lower;
+
+ i = 0;
+ foreach(cell, datums)
+ {
+ PartitionRangeDatum *datum = lfirst(cell);
+
+ /* What's contained in this range datum? */
+ bound->content[i] = !datum->infinite
+ ? RANGE_DATUM_FINITE
+ : (lower ? RANGE_DATUM_NEG_INF
+ : RANGE_DATUM_POS_INF);
+
+ if (bound->content[i] == RANGE_DATUM_FINITE)
+ {
+ Const *val = (Const *) datum->value;
+
+ if (val->constisnull)
+ elog(ERROR, "invalid range bound datum");
+ bound->datums[i] = val->constvalue;
+ }
+
+ i++;
+ }
+
+ return bound;
+}
+
+/* Used when sorting range bounds across all range partitions */
+static int32
+qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
+{
+ PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a);
+ PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b);
+ PartitionKey key = (PartitionKey) arg;
+
+ return partition_rbound_cmp(key, b1->datums, b1->content, b1->lower, b2);
+}
+
+/*
+ * partition_rbound_cmp
+ *
+ * Return for two range bounds whether the 1st one (specified in datum1,
+ * content1, and lower1) is <=, =, >= the bound specified in *b2
+ */
+static int32
+partition_rbound_cmp(PartitionKey key,
+ Datum *datums1, RangeDatumContent *content1, bool lower1,
+ PartitionRangeBound *b2)
+{
+ int32 cmpval;
+ int i;
+ Datum *datums2 = b2->datums;
+ RangeDatumContent *content2 = b2->content;
+ bool lower2 = b2->lower;
+
+ for (i = 0; i < key->partnatts; i++)
+ {
+ /*
+ * First, handle cases involving infinity, which don't require
+ * invoking the comparison proc.
+ */
+ if (content1[i] != RANGE_DATUM_FINITE &&
+ content2[i] != RANGE_DATUM_FINITE)
+
+ /*
+ * Both are infinity, so they are equal unless one is negative
+ * infinity and other positive (or vice versa)
+ */
+ return content1[i] == content2[i] ? 0
+ : (content1[i] < content2[i] ? -1 : 1);
+ else if (content1[i] != RANGE_DATUM_FINITE)
+ return content1[i] == RANGE_DATUM_NEG_INF ? -1 : 1;
+ else if (content2[i] != RANGE_DATUM_FINITE)
+ return content2[i] == RANGE_DATUM_NEG_INF ? 1 : -1;
+
+ cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[i],
+ key->partcollation[i],
+ datums1[i],
+ datums2[i]));
+ if (cmpval != 0)
+ break;
+ }
+
+ /*
+ * If the comparison is anything other than equal, we're done. If they
+ * compare equal though, we still have to consider whether the boundaries
+ * are inclusive or exclusive. Exclusive one is considered smaller of the
+ * two.
+ */
+ if (cmpval == 0 && lower1 != lower2)
+ cmpval = lower1 ? 1 : -1;
+
+ return cmpval;
+}
+
+/*
+ * partition_rbound_datum_cmp
+ *
+ * Return whether range bound (specified in rb_datums, rb_content, and
+ * rb_lower) <=, =, >= partition key of tuple (tuple_datums)
+ */
+static int32
+partition_rbound_datum_cmp(PartitionKey key,
+ Datum *rb_datums, RangeDatumContent *rb_content,
+ Datum *tuple_datums)
+{
+ int i;
+ int32 cmpval = -1;
+
+ for (i = 0; i < key->partnatts; i++)
+ {
+ if (rb_content[i] != RANGE_DATUM_FINITE)
+ return rb_content[i] == RANGE_DATUM_NEG_INF ? -1 : 1;
+
+ cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[i],
+ key->partcollation[i],
+ rb_datums[i],
+ tuple_datums[i]));
+ if (cmpval != 0)
+ break;
+ }
+
+ return cmpval;
+}
+
+/*
+ * partition_bound_cmp
+ *
+ * Return whether the bound at offset in boundinfo is <=, =, >= the argument
+ * specified in *probe.
+ */
+static int32
+partition_bound_cmp(PartitionKey key, PartitionBoundInfo boundinfo,
+ int offset, void *probe, bool probe_is_bound)
+{
+ Datum *bound_datums = boundinfo->datums[offset];
+ int32 cmpval = -1;
+
+ switch (key->strategy)
+ {
+ case PARTITION_STRATEGY_LIST:
+ cmpval = DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
+ key->partcollation[0],
+ bound_datums[0],
+ *(Datum *) probe));
+ break;
+
+ case PARTITION_STRATEGY_RANGE:
+ {
+ RangeDatumContent *content = boundinfo->content[offset];
+
+ if (probe_is_bound)
+ {
+ /*
+ * We need to pass whether the existing bound is a lower
+ * bound, so that two equal-valued lower and upper bounds
+ * are not regarded equal.
+ */
+ bool lower = boundinfo->indexes[offset] < 0;
+
+ cmpval = partition_rbound_cmp(key,
+ bound_datums, content, lower,
+ (PartitionRangeBound *) probe);
+ }
+ else
+ cmpval = partition_rbound_datum_cmp(key,
+ bound_datums, content,
+ (Datum *) probe);
+ break;
+ }
+
+ default:
+ elog(ERROR, "unexpected partition strategy: %d",
+ (int) key->strategy);
+ }
+
+ return cmpval;
+}
+
+/*
+ * Binary search on a collection of partition bounds. Returns greatest index
+ * of bound in array boundinfo->datums which is less or equal with *probe.
+ * If all bounds in the array are greater than *probe, -1 is returned.
+ *
+ * *probe could either be a partition bound or a Datum array representing
+ * the partition key of a tuple being routed; probe_is_bound tells which.
+ * We pass that down to the comparison function so that it can interpret the
+ * contents of *probe accordingly.
+ *
+ * *is_equal is set to whether the bound at the returned index is equal with
+ * *probe.
+ */
+static int
+partition_bound_bsearch(PartitionKey key, PartitionBoundInfo boundinfo,
+ void *probe, bool probe_is_bound, bool *is_equal)
+{
+ int lo,
+ hi,
+ mid;
+
+ lo = -1;
+ hi = boundinfo->ndatums - 1;
+ while (lo < hi)
+ {
+ int32 cmpval;
+
+ mid = (lo + hi + 1) / 2;
+ cmpval = partition_bound_cmp(key, boundinfo, mid, probe,
+ probe_is_bound);
+ if (cmpval <= 0)
+ {
+ lo = mid;
+ *is_equal = (cmpval == 0);
+ }
+ else
+ hi = mid - 1;
+ }
+
+ return lo;
+}
diff --git a/src/backend/catalog/pg_constraint.c b/src/backend/catalog/pg_constraint.c
index 8fabe6899f..724b41e64c 100644
--- a/src/backend/catalog/pg_constraint.c
+++ b/src/backend/catalog/pg_constraint.c
@@ -368,7 +368,7 @@ CreateConstraintEntry(const char *constraintName,
*/
recordDependencyOnSingleRelExpr(&conobject, conExpr, relId,
DEPENDENCY_NORMAL,
- DEPENDENCY_NORMAL);
+ DEPENDENCY_NORMAL, false);
}
/* Post creation hook for new constraint */
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index c617abb223..f4afcd9aae 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -201,7 +201,8 @@ analyze_rel(Oid relid, RangeVar *relation, int options,
* locked the relation.
*/
if (onerel->rd_rel->relkind == RELKIND_RELATION ||
- onerel->rd_rel->relkind == RELKIND_MATVIEW)
+ onerel->rd_rel->relkind == RELKIND_MATVIEW ||
+ onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
/* Regular table, so we'll use the regular row acquisition function */
acquirefunc = acquire_sample_rows;
@@ -1317,7 +1318,8 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
/* Check table type (MATVIEW can't happen, but might as well allow) */
if (childrel->rd_rel->relkind == RELKIND_RELATION ||
- childrel->rd_rel->relkind == RELKIND_MATVIEW)
+ childrel->rd_rel->relkind == RELKIND_MATVIEW ||
+ childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
/* Regular table, so use the regular row acquisition function */
acquirefunc = acquire_sample_rows;
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index ec5d6f1565..270be0af18 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -161,6 +161,11 @@ typedef struct CopyStateData
ExprState **defexprs; /* array of default att expressions */
bool volatile_defexprs; /* is any of defexprs volatile? */
List *range_table;
+ PartitionDispatch *partition_dispatch_info;
+ int num_dispatch;
+ int num_partitions;
+ ResultRelInfo *partitions;
+ TupleConversionMap **partition_tupconv_maps;
/*
* These variables are used to reduce overhead in textual COPY FROM.
@@ -1397,6 +1402,71 @@ BeginCopy(ParseState *pstate,
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("table \"%s\" does not have OIDs",
RelationGetRelationName(cstate->rel))));
+
+ /*
+ * Initialize state for CopyFrom tuple routing. Watch out for
+ * any foreign partitions.
+ */
+ if (is_from && rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ PartitionDispatch *pd;
+ List *leaf_parts;
+ ListCell *cell;
+ int i,
+ num_parted,
+ num_leaf_parts;
+ ResultRelInfo *leaf_part_rri;
+
+ /* Get the tuple-routing information and lock partitions */
+ pd = RelationGetPartitionDispatchInfo(rel, RowExclusiveLock,
+ &num_parted, &leaf_parts);
+ num_leaf_parts = list_length(leaf_parts);
+ cstate->partition_dispatch_info = pd;
+ cstate->num_dispatch = num_parted;
+ cstate->num_partitions = num_leaf_parts;
+ cstate->partitions = (ResultRelInfo *) palloc(num_leaf_parts *
+ sizeof(ResultRelInfo));
+ cstate->partition_tupconv_maps = (TupleConversionMap **)
+ palloc0(num_leaf_parts * sizeof(TupleConversionMap *));
+
+ leaf_part_rri = cstate->partitions;
+ i = 0;
+ foreach(cell, leaf_parts)
+ {
+ Relation partrel;
+
+ /*
+ * We locked all the partitions above including the leaf
+ * partitions. Note that each of the relations in
+ * cstate->partitions will be closed by CopyFrom() after
+ * it's finished with its processing.
+ */
+ partrel = heap_open(lfirst_oid(cell), NoLock);
+
+ /*
+ * Verify result relation is a valid target for the current
+ * operation.
+ */
+ CheckValidResultRel(partrel, CMD_INSERT);
+
+ InitResultRelInfo(leaf_part_rri,
+ partrel,
+ 1, /* dummy */
+ false, /* no partition constraint check */
+ 0);
+
+ /* Open partition indices */
+ ExecOpenIndices(leaf_part_rri, false);
+
+ if (!equalTupleDescs(tupDesc, RelationGetDescr(partrel)))
+ cstate->partition_tupconv_maps[i] =
+ convert_tuples_by_name(tupDesc,
+ RelationGetDescr(partrel),
+ gettext_noop("could not convert row type"));
+ leaf_part_rri++;
+ i++;
+ }
+ }
}
else
{
@@ -1751,6 +1821,12 @@ BeginCopyTo(ParseState *pstate,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("cannot copy from sequence \"%s\"",
RelationGetRelationName(rel))));
+ else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot copy from partitioned table \"%s\"",
+ RelationGetRelationName(rel)),
+ errhint("Try the COPY (SELECT ...) TO variant.")));
else
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
@@ -2249,6 +2325,7 @@ CopyFrom(CopyState cstate)
Datum *values;
bool *nulls;
ResultRelInfo *resultRelInfo;
+ ResultRelInfo *saved_resultRelInfo = NULL;
EState *estate = CreateExecutorState(); /* for ExecConstraints() */
ExprContext *econtext;
TupleTableSlot *myslot;
@@ -2275,6 +2352,7 @@ CopyFrom(CopyState cstate)
* only hint about them in the view case.)
*/
if (cstate->rel->rd_rel->relkind != RELKIND_RELATION &&
+ cstate->rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE &&
!(cstate->rel->trigdesc &&
cstate->rel->trigdesc->trig_insert_instead_row))
{
@@ -2385,6 +2463,7 @@ CopyFrom(CopyState cstate)
InitResultRelInfo(resultRelInfo,
cstate->rel,
1, /* dummy rangetable index */
+ true, /* do load partition check expression */
0);
ExecOpenIndices(resultRelInfo, false);
@@ -2407,11 +2486,13 @@ CopyFrom(CopyState cstate)
* BEFORE/INSTEAD OF triggers, or we need to evaluate volatile default
* expressions. Such triggers or expressions might query the table we're
* inserting to, and act differently if the tuples that have already been
- * processed and prepared for insertion are not there.
+ * processed and prepared for insertion are not there. We also can't
+ * do it if the table is partitioned.
*/
if ((resultRelInfo->ri_TrigDesc != NULL &&
(resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
resultRelInfo->ri_TrigDesc->trig_insert_instead_row)) ||
+ cstate->partition_dispatch_info != NULL ||
cstate->volatile_defexprs)
{
useHeapMultiInsert = false;
@@ -2488,6 +2569,59 @@ CopyFrom(CopyState cstate)
slot = myslot;
ExecStoreTuple(tuple, slot, InvalidBuffer, false);
+ /* Determine the partition to heap_insert the tuple into */
+ if (cstate->partition_dispatch_info)
+ {
+ int leaf_part_index;
+ TupleConversionMap *map;
+
+ /*
+ * Away we go ... If we end up not finding a partition after all,
+ * ExecFindPartition() does not return and errors out instead.
+ * Otherwise, the returned value is to be used as an index into
+ * arrays mt_partitions[] and mt_partition_tupconv_maps[] that
+ * will get us the ResultRelInfo and TupleConversionMap for the
+ * partition, respectively.
+ */
+ leaf_part_index = ExecFindPartition(resultRelInfo,
+ cstate->partition_dispatch_info,
+ slot,
+ estate);
+ Assert(leaf_part_index >= 0 &&
+ leaf_part_index < cstate->num_partitions);
+
+ /*
+ * Save the old ResultRelInfo and switch to the one corresponding
+ * to the selected partition.
+ */
+ saved_resultRelInfo = resultRelInfo;
+ resultRelInfo = cstate->partitions + leaf_part_index;
+
+ /* We do not yet have a way to insert into a foreign partition */
+ if (resultRelInfo->ri_FdwRoutine)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot route inserted tuples to a foreign table")));
+
+ /*
+ * For ExecInsertIndexTuples() to work on the partition's indexes
+ */
+ estate->es_result_relation_info = resultRelInfo;
+
+ /*
+ * We might need to convert from the parent rowtype to the
+ * partition rowtype.
+ */
+ map = cstate->partition_tupconv_maps[leaf_part_index];
+ if (map)
+ {
+ tuple = do_convert_tuple(tuple, map);
+ ExecStoreTuple(tuple, slot, InvalidBuffer, true);
+ }
+
+ tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
+ }
+
skip_tuple = false;
/* BEFORE ROW INSERT Triggers */
@@ -2513,7 +2647,8 @@ CopyFrom(CopyState cstate)
else
{
/* Check the constraints of the tuple */
- if (cstate->rel->rd_att->constr)
+ if (cstate->rel->rd_att->constr ||
+ resultRelInfo->ri_PartitionCheck)
ExecConstraints(resultRelInfo, slot, estate);
if (useHeapMultiInsert)
@@ -2546,7 +2681,8 @@ CopyFrom(CopyState cstate)
List *recheckIndexes = NIL;
/* OK, store the tuple and create index entries for it */
- heap_insert(cstate->rel, tuple, mycid, hi_options, bistate);
+ heap_insert(resultRelInfo->ri_RelationDesc, tuple, mycid,
+ hi_options, bistate);
if (resultRelInfo->ri_NumIndices > 0)
recheckIndexes = ExecInsertIndexTuples(slot,
@@ -2570,6 +2706,12 @@ CopyFrom(CopyState cstate)
* tuples inserted by an INSERT command.
*/
processed++;
+
+ if (saved_resultRelInfo)
+ {
+ resultRelInfo = saved_resultRelInfo;
+ estate->es_result_relation_info = resultRelInfo;
+ }
}
}
@@ -2607,6 +2749,32 @@ CopyFrom(CopyState cstate)
ExecCloseIndices(resultRelInfo);
+ /* Close all the partitioned tables, leaf partitions, and their indices */
+ if (cstate->partition_dispatch_info)
+ {
+ int i;
+
+ /*
+ * Remember cstate->partition_dispatch_info[0] corresponds to the root
+ * partitioned table, which we must not try to close, because it is
+ * the main target table of COPY that will be closed eventually by
+ * DoCopy().
+ */
+ for (i = 1; i < cstate->num_dispatch; i++)
+ {
+ PartitionDispatch pd = cstate->partition_dispatch_info[i];
+
+ heap_close(pd->reldesc, NoLock);
+ }
+ for (i = 0; i < cstate->num_partitions; i++)
+ {
+ ResultRelInfo *resultRelInfo = cstate->partitions + i;
+
+ ExecCloseIndices(resultRelInfo);
+ heap_close(resultRelInfo->ri_RelationDesc, NoLock);
+ }
+ }
+
FreeExecutorState(estate);
/*
diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c
index 5b4f6affcc..d6d52d9929 100644
--- a/src/backend/commands/createas.c
+++ b/src/backend/commands/createas.c
@@ -112,7 +112,7 @@ create_ctas_internal(List *attrList, IntoClause *into)
* Create the relation. (This will error out if there's an existing view,
* so we don't need more code to complain if "replace" is false.)
*/
- intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL);
+ intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL, NULL);
/*
* If necessary, create a TOAST table for the target table. Note that
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 85817c6530..eeb2b1fe80 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -69,8 +69,6 @@ static void ComputeIndexAttrs(IndexInfo *indexInfo,
char *accessMethodName, Oid accessMethodId,
bool amcanorder,
bool isconstraint);
-static Oid GetIndexOpClass(List *opclass, Oid attrType,
- char *accessMethodName, Oid accessMethodId);
static char *ChooseIndexName(const char *tabname, Oid namespaceId,
List *colnames, List *exclusionOpNames,
bool primary, bool isconstraint);
@@ -383,6 +381,11 @@ DefineIndex(Oid relationId,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("cannot create index on foreign table \"%s\"",
RelationGetRelationName(rel))));
+ else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot create index on partitioned table \"%s\"",
+ RelationGetRelationName(rel))));
else
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
@@ -1145,10 +1148,10 @@ ComputeIndexAttrs(IndexInfo *indexInfo,
/*
* Identify the opclass to use.
*/
- classOidP[attn] = GetIndexOpClass(attribute->opclass,
- atttype,
- accessMethodName,
- accessMethodId);
+ classOidP[attn] = ResolveOpClass(attribute->opclass,
+ atttype,
+ accessMethodName,
+ accessMethodId);
/*
* Identify the exclusion operator, if any.
@@ -1255,10 +1258,13 @@ ComputeIndexAttrs(IndexInfo *indexInfo,
/*
* Resolve possibly-defaulted operator class specification
+ *
+ * Note: This is used to resolve operator class specification in index and
+ * partition key definitions.
*/
-static Oid
-GetIndexOpClass(List *opclass, Oid attrType,
- char *accessMethodName, Oid accessMethodId)
+Oid
+ResolveOpClass(List *opclass, Oid attrType,
+ char *accessMethodName, Oid accessMethodId)
{
char *schemaname;
char *opcname;
diff --git a/src/backend/commands/lockcmds.c b/src/backend/commands/lockcmds.c
index a0c0d75977..9e62e00b8d 100644
--- a/src/backend/commands/lockcmds.c
+++ b/src/backend/commands/lockcmds.c
@@ -87,7 +87,7 @@ RangeVarCallbackForLockTable(const RangeVar *rv, Oid relid, Oid oldrelid,
* check */
/* Currently, we only allow plain tables to be locked */
- if (relkind != RELKIND_RELATION)
+ if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table",
diff --git a/src/backend/commands/policy.c b/src/backend/commands/policy.c
index 70e22c1000..6da3205c9e 100644
--- a/src/backend/commands/policy.c
+++ b/src/backend/commands/policy.c
@@ -88,7 +88,7 @@ RangeVarCallbackForPolicy(const RangeVar *rv, Oid relid, Oid oldrelid,
rv->relname)));
/* Relation type MUST be a table. */
- if (relkind != RELKIND_RELATION)
+ if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table", rv->relname)));
@@ -384,7 +384,8 @@ RemovePolicyById(Oid policy_id)
relid = ((Form_pg_policy) GETSTRUCT(tuple))->polrelid;
rel = heap_open(relid, AccessExclusiveLock);
- if (rel->rd_rel->relkind != RELKIND_RELATION)
+ if (rel->rd_rel->relkind != RELKIND_RELATION &&
+ rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table",
diff --git a/src/backend/commands/seclabel.c b/src/backend/commands/seclabel.c
index 5bd7e124c1..2b0ae34830 100644
--- a/src/backend/commands/seclabel.c
+++ b/src/backend/commands/seclabel.c
@@ -110,7 +110,8 @@ ExecSecLabelStmt(SecLabelStmt *stmt)
relation->rd_rel->relkind != RELKIND_VIEW &&
relation->rd_rel->relkind != RELKIND_MATVIEW &&
relation->rd_rel->relkind != RELKIND_COMPOSITE_TYPE &&
- relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
+ relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+ relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table, view, materialized view, composite type, or foreign table",
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
index 7e37108b8d..d953b4408b 100644
--- a/src/backend/commands/sequence.c
+++ b/src/backend/commands/sequence.c
@@ -234,7 +234,7 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq)
stmt->tablespacename = NULL;
stmt->if_not_exists = seq->if_not_exists;
- address = DefineRelation(stmt, RELKIND_SEQUENCE, seq->ownerId, NULL);
+ address = DefineRelation(stmt, RELKIND_SEQUENCE, seq->ownerId, NULL, NULL);
seqoid = address.objectId;
Assert(seqoid != InvalidOid);
@@ -1475,7 +1475,8 @@ process_owned_by(Relation seqrel, List *owned_by)
/* Must be a regular or foreign table */
if (!(tablerel->rd_rel->relkind == RELKIND_RELATION ||
- tablerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE))
+ tablerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE ||
+ tablerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("referenced relation \"%s\" is not a table or foreign table",
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 6322fa75a7..c77b216d4f 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -29,6 +29,7 @@
#include "catalog/indexing.h"
#include "catalog/namespace.h"
#include "catalog/objectaccess.h"
+#include "catalog/partition.h"
#include "catalog/pg_am.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_constraint.h"
@@ -65,6 +66,9 @@
#include "nodes/parsenodes.h"
#include "optimizer/clauses.h"
#include "optimizer/planner.h"
+#include "optimizer/predtest.h"
+#include "optimizer/prep.h"
+#include "optimizer/var.h"
#include "parser/parse_clause.h"
#include "parser/parse_coerce.h"
#include "parser/parse_collate.h"
@@ -162,6 +166,7 @@ typedef struct AlteredTableInfo
Oid newTableSpace; /* new tablespace; 0 means no change */
bool chgPersistence; /* T if SET LOGGED/UNLOGGED is used */
char newrelpersistence; /* if above is true */
+ List *partition_constraint; /* for attach partition validation */
/* Objects to rebuild after completing ALTER TYPE operations */
List *changedConstraintOids; /* OIDs of constraints to rebuild */
List *changedConstraintDefs; /* string definitions of same */
@@ -252,6 +257,12 @@ static const struct dropmsgstrings dropmsgstringarray[] = {
gettext_noop("foreign table \"%s\" does not exist, skipping"),
gettext_noop("\"%s\" is not a foreign table"),
gettext_noop("Use DROP FOREIGN TABLE to remove a foreign table.")},
+ {RELKIND_PARTITIONED_TABLE,
+ ERRCODE_UNDEFINED_TABLE,
+ gettext_noop("table \"%s\" does not exist"),
+ gettext_noop("table \"%s\" does not exist, skipping"),
+ gettext_noop("\"%s\" is not a table"),
+ gettext_noop("Use DROP TABLE to remove a table.")},
{'\0', 0, NULL, NULL, NULL, NULL}
};
@@ -272,7 +283,8 @@ struct DropRelationCallbackState
static void truncate_check_rel(Relation rel);
static List *MergeAttributes(List *schema, List *supers, char relpersistence,
- List **supOids, List **supconstr, int *supOidCount);
+ bool is_partition, List **supOids, List **supconstr,
+ int *supOidCount);
static bool MergeCheckConstraint(List *constraints, char *name, Node *expr);
static void MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel);
static void MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel);
@@ -339,7 +351,9 @@ static void add_column_datatype_dependency(Oid relid, int32 attnum, Oid typid);
static void add_column_collation_dependency(Oid relid, int32 attnum, Oid collid);
static void ATPrepAddOids(List **wqueue, Relation rel, bool recurse,
AlterTableCmd *cmd, LOCKMODE lockmode);
+static void ATPrepDropNotNull(Relation rel, bool recurse, bool recursing);
static ObjectAddress ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode);
+static void ATPrepSetNotNull(Relation rel, bool recurse, bool recursing);
static ObjectAddress ATExecSetNotNull(AlteredTableInfo *tab, Relation rel,
const char *colName, LOCKMODE lockmode);
static ObjectAddress ATExecColumnDefault(Relation rel, const char *colName,
@@ -433,6 +447,15 @@ static void RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid,
Oid oldRelOid, void *arg);
static void RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid,
Oid oldrelid, void *arg);
+static bool is_partition_attr(Relation rel, AttrNumber attnum, bool *used_in_expr);
+static PartitionSpec *transformPartitionSpec(Relation rel, PartitionSpec *partspec, char *strategy);
+static void ComputePartitionAttrs(Relation rel, List *partParams, AttrNumber *partattrs,
+ List **partexprs, Oid *partopclass, Oid *partcollation);
+static void CreateInheritance(Relation child_rel, Relation parent_rel);
+static void RemoveInheritance(Relation child_rel, Relation parent_rel);
+static ObjectAddress ATExecAttachPartition(List **wqueue, Relation rel,
+ PartitionCmd *cmd);
+static ObjectAddress ATExecDetachPartition(Relation rel, RangeVar *name);
/* ----------------------------------------------------------------
@@ -455,7 +478,7 @@ static void RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid,
*/
ObjectAddress
DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
- ObjectAddress *typaddress)
+ ObjectAddress *typaddress, const char *queryString)
{
char relname[NAMEDATALEN];
Oid namespaceId;
@@ -492,6 +515,14 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("ON COMMIT can only be used on temporary tables")));
+ if (stmt->partspec != NULL)
+ {
+ if (relkind != RELKIND_RELATION)
+ elog(ERROR, "unexpected relkind: %d", (int) relkind);
+
+ relkind = RELKIND_PARTITIONED_TABLE;
+ }
+
/*
* Look up the namespace in which we are supposed to create the relation,
* check we have permission to create there, lock it against concurrent
@@ -578,6 +609,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
*/
schema = MergeAttributes(schema, stmt->inhRelations,
stmt->relation->relpersistence,
+ stmt->partbound != NULL,
&inheritOids, &old_constraints, &parentOidCount);
/*
@@ -588,17 +620,33 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
descriptor = BuildDescForRelation(schema);
/*
- * Notice that we allow OIDs here only for plain tables, even though some
- * other relkinds can support them. This is necessary because the
- * default_with_oids GUC must apply only to plain tables and not any other
- * relkind; doing otherwise would break existing pg_dump files. We could
- * allow explicit "WITH OIDS" while not allowing default_with_oids to
- * affect other relkinds, but it would complicate interpretOidsOption().
+ * Notice that we allow OIDs here only for plain tables and partitioned
+ * tables, even though some other relkinds can support them. This is
+ * necessary because the default_with_oids GUC must apply only to plain
+ * tables and not any other relkind; doing otherwise would break existing
+ * pg_dump files. We could allow explicit "WITH OIDS" while not allowing
+ * default_with_oids to affect other relkinds, but it would complicate
+ * interpretOidsOption().
*/
localHasOids = interpretOidsOption(stmt->options,
- (relkind == RELKIND_RELATION));
+ (relkind == RELKIND_RELATION ||
+ relkind == RELKIND_PARTITIONED_TABLE));
descriptor->tdhasoid = (localHasOids || parentOidCount > 0);
+ if (stmt->partbound)
+ {
+ /* If the parent has OIDs, partitions must have them too. */
+ if (parentOidCount > 0 && !localHasOids)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot create table without OIDs as partition of table with OIDs")));
+ /* If the parent doesn't, partitions must not have them. */
+ if (parentOidCount == 0 && localHasOids)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot create table with OIDs as partition of table without OIDs")));
+ }
+
/*
* Find columns with default values and prepare for insertion of the
* defaults. Pre-cooked (that is, inherited) defaults go into a list of
@@ -697,6 +745,110 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
*/
rel = relation_open(relationId, AccessExclusiveLock);
+ /* Process and store partition bound, if any. */
+ if (stmt->partbound)
+ {
+ Node *bound;
+ ParseState *pstate;
+ Oid parentId = linitial_oid(inheritOids);
+ Relation parent;
+
+ /* Already have strong enough lock on the parent */
+ parent = heap_open(parentId, NoLock);
+
+ /*
+ * We are going to try to validate the partition bound specification
+ * against the partition key of parentRel, so it better have one.
+ */
+ if (parent->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("\"%s\" is not partitioned",
+ RelationGetRelationName(parent))));
+
+ /* Tranform the bound values */
+ pstate = make_parsestate(NULL);
+ pstate->p_sourcetext = queryString;
+ bound = transformPartitionBound(pstate, parent, stmt->partbound);
+
+ /*
+ * Check first that the new partition's bound is valid and does not
+ * overlap with any of existing partitions of the parent - note that
+ * it does not return on error.
+ */
+ check_new_partition_bound(relname, parent, bound);
+ heap_close(parent, NoLock);
+
+ /* Update the pg_class entry. */
+ StorePartitionBound(rel, bound);
+
+ /*
+ * The code that follows may also update the pg_class tuple to update
+ * relnumchecks, so bump up the command counter to avoid the "already
+ * updated by self" error.
+ */
+ CommandCounterIncrement();
+ }
+
+ /*
+ * Process the partitioning specification (if any) and store the
+ * partition key information into the catalog.
+ */
+ if (stmt->partspec)
+ {
+ char strategy;
+ int partnatts,
+ i;
+ AttrNumber partattrs[PARTITION_MAX_KEYS];
+ Oid partopclass[PARTITION_MAX_KEYS];
+ Oid partcollation[PARTITION_MAX_KEYS];
+ List *partexprs = NIL;
+ List *cmds = NIL;
+
+ /*
+ * We need to transform the raw parsetrees corresponding to partition
+ * expressions into executable expression trees. Like column defaults
+ * and CHECK constraints, we could not have done the transformation
+ * earlier.
+ */
+ stmt->partspec = transformPartitionSpec(rel, stmt->partspec,
+ &strategy);
+ ComputePartitionAttrs(rel, stmt->partspec->partParams,
+ partattrs, &partexprs, partopclass,
+ partcollation);
+
+ partnatts = list_length(stmt->partspec->partParams);
+ StorePartitionKey(rel, strategy, partnatts, partattrs, partexprs,
+ partopclass, partcollation);
+
+ /* Force key columns to be NOT NULL when using range partitioning */
+ if (strategy == PARTITION_STRATEGY_RANGE)
+ {
+ for (i = 0; i < partnatts; i++)
+ {
+ AttrNumber partattno = partattrs[i];
+ Form_pg_attribute attform = descriptor->attrs[partattno-1];
+
+ if (partattno != 0 && !attform->attnotnull)
+ {
+ /* Add a subcommand to make this one NOT NULL */
+ AlterTableCmd *cmd = makeNode(AlterTableCmd);
+
+ cmd->subtype = AT_SetNotNull;
+ cmd->name = pstrdup(NameStr(attform->attname));
+ cmds = lappend(cmds, cmd);
+ }
+ }
+
+ /*
+ * Although, there cannot be any partitions yet, we still need to
+ * pass true for recurse; ATPrepSetNotNull() complains if we don't
+ */
+ if (cmds != NIL)
+ AlterTableInternal(RelationGetRelid(rel), cmds, true);
+ }
+ }
+
/*
* Now add any newly specified column default values and CHECK constraints
* to the new relation. These are passed to us in the form of raw
@@ -927,6 +1079,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
HeapTuple tuple;
struct DropRelationCallbackState *state;
char relkind;
+ char expected_relkind;
Form_pg_class classform;
LOCKMODE heap_lockmode;
@@ -955,7 +1108,19 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
return; /* concurrently dropped, so nothing to do */
classform = (Form_pg_class) GETSTRUCT(tuple);
- if (classform->relkind != relkind)
+ /*
+ * Both RELKIND_RELATION and RELKIND_PARTITIONED_TABLE are OBJECT_TABLE,
+ * but RemoveRelations() can only pass one relkind for a given relation.
+ * It chooses RELKIND_RELATION for both regular and partitioned tables.
+ * That means we must be careful before giving the wrong type error when
+ * the relation is RELKIND_PARTITIONED_TABLE.
+ */
+ if (classform->relkind == RELKIND_PARTITIONED_TABLE)
+ expected_relkind = RELKIND_RELATION;
+ else
+ expected_relkind = classform->relkind;
+
+ if (relkind != expected_relkind)
DropErrorMsgWrongType(rel->relname, classform->relkind, relkind);
/* Allow DROP to either table owner or schema owner */
@@ -1054,6 +1219,10 @@ ExecuteTruncate(TruncateStmt *stmt)
relids = lappend_oid(relids, childrelid);
}
}
+ else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("must truncate child tables too")));
}
/*
@@ -1153,6 +1322,7 @@ ExecuteTruncate(TruncateStmt *stmt)
InitResultRelInfo(resultRelInfo,
rel,
0, /* dummy rangetable index */
+ false,
0);
resultRelInfo++;
}
@@ -1293,7 +1463,8 @@ truncate_check_rel(Relation rel)
AclResult aclresult;
/* Only allow truncate on regular tables */
- if (rel->rd_rel->relkind != RELKIND_RELATION)
+ if (rel->rd_rel->relkind != RELKIND_RELATION &&
+ rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table",
@@ -1359,6 +1530,7 @@ storage_name(char c)
* of ColumnDef's.) It is destructively changed.
* 'supers' is a list of names (as RangeVar nodes) of parent relations.
* 'relpersistence' is a persistence type of the table.
+ * 'is_partition' tells if the table is a partition
*
* Output arguments:
* 'supOids' receives a list of the OIDs of the parent relations.
@@ -1410,7 +1582,8 @@ storage_name(char c)
*/
static List *
MergeAttributes(List *schema, List *supers, char relpersistence,
- List **supOids, List **supconstr, int *supOidCount)
+ bool is_partition, List **supOids, List **supconstr,
+ int *supOidCount)
{
ListCell *entry;
List *inhSchema = NIL;
@@ -1420,6 +1593,7 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
bool have_bogus_defaults = false;
int child_attno;
static Node bogus_marker = {0}; /* marks conflicting defaults */
+ List *saved_schema = NIL;
/*
* Check for and reject tables with too many columns. We perform this
@@ -1438,6 +1612,17 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
errmsg("tables can have at most %d columns",
MaxHeapAttributeNumber)));
+ /*
+ * In case of a partition, there are no new column definitions, only
+ * dummy ColumnDefs created for column constraints. We merge these
+ * constraints inherited from the parent.
+ */
+ if (is_partition)
+ {
+ saved_schema = schema;
+ schema = NIL;
+ }
+
/*
* Check for duplicate names in the explicit list of attributes.
*
@@ -1518,11 +1703,35 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
* on the parent table, which might otherwise be attempting to clear
* the parent's relhassubclass field, if its previous children were
* recently dropped.
+ *
+ * If the child table is a partition, then we instead grab an exclusive
+ * lock on the parent because its partition descriptor will be changed
+ * by addition of the new partition.
+ */
+ if (!is_partition)
+ relation = heap_openrv(parent, ShareUpdateExclusiveLock);
+ else
+ relation = heap_openrv(parent, AccessExclusiveLock);
+
+ /*
+ * We do not allow partitioned tables and partitions to participate
+ * in regular inheritance.
*/
- relation = heap_openrv(parent, ShareUpdateExclusiveLock);
+ if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+ !is_partition)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot inherit from partitioned table \"%s\"",
+ parent->relname)));
+ if (relation->rd_rel->relispartition && !is_partition)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot inherit from partition \"%s\"",
+ parent->relname)));
if (relation->rd_rel->relkind != RELKIND_RELATION &&
- relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
+ relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+ relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("inherited relation \"%s\" is not a table or foreign table",
@@ -1532,7 +1741,9 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
- errmsg("cannot inherit from temporary relation \"%s\"",
+ errmsg(!is_partition
+ ? "cannot inherit from temporary relation \"%s\""
+ : "cannot create a permanent relation as partition of temporary relation \"%s\"",
parent->relname)));
/* If existing rel is temp, it must belong to this session */
@@ -1540,7 +1751,9 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
!relation->rd_islocaltemp)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
- errmsg("cannot inherit from temporary relation of another session")));
+ errmsg(!is_partition
+ ? "cannot inherit from temporary relation of another session"
+ : "cannot create as partition of temporary relation of another session")));
/*
* We should have an UNDER permission flag for this, but for now,
@@ -1777,9 +1990,9 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
pfree(newattno);
/*
- * Close the parent rel, but keep our ShareUpdateExclusiveLock on it
- * until xact commit. That will prevent someone else from deleting or
- * ALTERing the parent before the child is committed.
+ * Close the parent rel, but keep our lock on it until xact commit.
+ * That will prevent someone else from deleting or ALTERing the parent
+ * before the child is committed.
*/
heap_close(relation, NoLock);
}
@@ -1787,7 +2000,8 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
/*
* If we had no inherited attributes, the result schema is just the
* explicitly declared columns. Otherwise, we need to merge the declared
- * columns into the inherited schema list.
+ * columns into the inherited schema list. Although, we never have any
+ * explicitly declared columns if the table is a partition.
*/
if (inhSchema != NIL)
{
@@ -1815,6 +2029,12 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
Oid defcollid,
newcollid;
+ /*
+ * Partitions have only one parent, so conflict should never
+ * occur
+ */
+ Assert(!is_partition);
+
/*
* Yes, try to merge the two column definitions. They must
* have the same type, typmod, and collation.
@@ -1896,6 +2116,56 @@ MergeAttributes(List *schema, List *supers, char relpersistence,
MaxHeapAttributeNumber)));
}
+ /*
+ * Now that we have the column definition list for a partition, we can
+ * check whether the columns referenced in column option specifications
+ * actually exist. Also, we merge the options into the corresponding
+ * column definitions.
+ */
+ if (is_partition && list_length(saved_schema) > 0)
+ {
+ schema = list_concat(schema, saved_schema);
+
+ foreach(entry, schema)
+ {
+ ColumnDef *coldef = lfirst(entry);
+ ListCell *rest = lnext(entry);
+ ListCell *prev = entry;
+
+ /*
+ * Partition column option that does not belong to a column from
+ * the parent. This works because the columns from the parent
+ * come first in the list (see above).
+ */
+ if (coldef->typeName == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" does not exist",
+ coldef->colname)));
+ while (rest != NULL)
+ {
+ ColumnDef *restdef = lfirst(rest);
+ ListCell *next = lnext(rest); /* need to save it in case
+ * we delete it */
+
+ if (strcmp(coldef->colname, restdef->colname) == 0)
+ {
+ /*
+ * merge the column options into the column from the
+ * parent
+ */
+ coldef->is_not_null = restdef->is_not_null;
+ coldef->raw_default = restdef->raw_default;
+ coldef->cooked_default = restdef->cooked_default;
+ coldef->constraints = restdef->constraints;
+ list_delete_cell(schema, rest, prev);
+ }
+ prev = rest;
+ rest = next;
+ }
+ }
+ }
+
/*
* If we found any conflicting parent default values, check to make sure
* they were overridden by the child.
@@ -2166,7 +2436,8 @@ renameatt_check(Oid myrelid, Form_pg_class classform, bool recursing)
relkind != RELKIND_MATVIEW &&
relkind != RELKIND_COMPOSITE_TYPE &&
relkind != RELKIND_INDEX &&
- relkind != RELKIND_FOREIGN_TABLE)
+ relkind != RELKIND_FOREIGN_TABLE &&
+ relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table, view, materialized view, composite type, index, or foreign table",
@@ -3057,6 +3328,11 @@ AlterTableGetLockLevel(List *cmds)
cmd_lockmode = AlterTableGetRelOptionsLockLevel((List *) cmd->def);
break;
+ case AT_AttachPartition:
+ case AT_DetachPartition:
+ cmd_lockmode = AccessExclusiveLock;
+ break;
+
default: /* oops */
elog(ERROR, "unrecognized alter table type: %d",
(int) cmd->subtype);
@@ -3168,12 +3444,14 @@ ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd,
break;
case AT_DropNotNull: /* ALTER COLUMN DROP NOT NULL */
ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+ ATPrepDropNotNull(rel, recurse, recursing);
ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode);
/* No command-specific prep needed */
pass = AT_PASS_DROP;
break;
case AT_SetNotNull: /* ALTER COLUMN SET NOT NULL */
ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+ ATPrepSetNotNull(rel, recurse, recursing);
ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode);
/* No command-specific prep needed */
pass = AT_PASS_ADD_CONSTR;
@@ -3374,6 +3652,12 @@ ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd,
/* No command-specific prep needed */
pass = AT_PASS_MISC;
break;
+ case AT_AttachPartition:
+ case AT_DetachPartition:
+ ATSimplePermissions(rel, ATT_TABLE);
+ /* No command-specific prep needed */
+ pass = AT_PASS_MISC;
+ break;
default: /* oops */
elog(ERROR, "unrecognized alter table type: %d",
(int) cmd->subtype);
@@ -3444,7 +3728,14 @@ ATRewriteCatalogs(List **wqueue, LOCKMODE lockmode)
{
AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab);
- if (tab->relkind == RELKIND_RELATION ||
+ /*
+ * If the table is source table of ATTACH PARTITION command, we did
+ * not modify anything about it that will change its toasting
+ * requirement, so no need to check.
+ */
+ if (((tab->relkind == RELKIND_RELATION ||
+ tab->relkind == RELKIND_PARTITIONED_TABLE) &&
+ tab->partition_constraint == NIL) ||
tab->relkind == RELKIND_MATVIEW)
AlterTableCreateToastTable(tab->relid, (Datum) 0, lockmode);
}
@@ -3693,6 +3984,12 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab, Relation rel,
case AT_GenericOptions:
ATExecGenericOptions(rel, (List *) cmd->def);
break;
+ case AT_AttachPartition:
+ ATExecAttachPartition(wqueue, rel, (PartitionCmd *) cmd->def);
+ break;
+ case AT_DetachPartition:
+ ATExecDetachPartition(rel, ((PartitionCmd *) cmd->def)->name);
+ break;
default: /* oops */
elog(ERROR, "unrecognized alter table type: %d",
(int) cmd->subtype);
@@ -3878,7 +4175,8 @@ ATRewriteTables(AlterTableStmt *parsetree, List **wqueue, LOCKMODE lockmode)
* Test the current data within the table against new constraints
* generated by ALTER TABLE commands, but don't rebuild data.
*/
- if (tab->constraints != NIL || tab->new_notnull)
+ if (tab->constraints != NIL || tab->new_notnull ||
+ tab->partition_constraint != NIL)
ATRewriteTable(tab, InvalidOid, lockmode);
/*
@@ -3958,6 +4256,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
CommandId mycid;
BulkInsertState bistate;
int hi_options;
+ List *partqualstate = NIL;
/*
* Open the relation(s). We have surely already locked the existing
@@ -4022,6 +4321,15 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
}
}
+ /* Build expression execution states for partition check quals */
+ if (tab->partition_constraint)
+ {
+ needscan = true;
+ partqualstate = (List *)
+ ExecPrepareExpr((Expr *) tab->partition_constraint,
+ estate);
+ }
+
foreach(l, tab->newvals)
{
NewColumnValue *ex = lfirst(l);
@@ -4211,6 +4519,11 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
}
}
+ if (partqualstate && !ExecQual(partqualstate, econtext, true))
+ ereport(ERROR,
+ (errcode(ERRCODE_CHECK_VIOLATION),
+ errmsg("partition constraint is violated by some row")));
+
/* Write the tuple out to the new relation */
if (newrel)
heap_insert(newrel, tuple, mycid, hi_options, bistate);
@@ -4291,6 +4604,7 @@ ATSimplePermissions(Relation rel, int allowed_targets)
switch (rel->rd_rel->relkind)
{
case RELKIND_RELATION:
+ case RELKIND_PARTITIONED_TABLE:
actual_target = ATT_TABLE;
break;
case RELKIND_VIEW:
@@ -4407,7 +4721,8 @@ ATSimpleRecursion(List **wqueue, Relation rel,
*/
if (recurse &&
(rel->rd_rel->relkind == RELKIND_RELATION ||
- rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE))
+ rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE ||
+ rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE))
{
Oid relid = RelationGetRelid(rel);
ListCell *child;
@@ -4527,7 +4842,8 @@ find_composite_type_dependencies(Oid typeOid, Relation origRelation,
att = rel->rd_att->attrs[pg_depend->objsubid - 1];
if (rel->rd_rel->relkind == RELKIND_RELATION ||
- rel->rd_rel->relkind == RELKIND_MATVIEW)
+ rel->rd_rel->relkind == RELKIND_MATVIEW ||
+ rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
if (origTypeName)
ereport(ERROR,
@@ -4728,6 +5044,11 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
if (recursing)
ATSimplePermissions(rel, ATT_TABLE | ATT_FOREIGN_TABLE);
+ if (rel->rd_rel->relispartition && !recursing)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot add column to a partition")));
+
attrdesc = heap_open(AttributeRelationId, RowExclusiveLock);
/*
@@ -5174,6 +5495,20 @@ ATPrepAddOids(List **wqueue, Relation rel, bool recurse, AlterTableCmd *cmd, LOC
* Return the address of the modified column. If the column was already
* nullable, InvalidObjectAddress is returned.
*/
+
+static void
+ATPrepDropNotNull(Relation rel, bool recurse, bool recursing)
+{
+ /*
+ * If the parent is a partitioned table, like check constraints, NOT NULL
+ * constraints must be dropped from child tables.
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+ !recurse && !recursing)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("constraint must be dropped from child tables too")));
+}
static ObjectAddress
ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode)
{
@@ -5249,6 +5584,45 @@ ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode)
list_free(indexoidlist);
+ /* If rel is partition, shouldn't drop NOT NULL if parent has the same */
+ if (rel->rd_rel->relispartition)
+ {
+ Oid parentId = get_partition_parent(RelationGetRelid(rel));
+ Relation parent = heap_open(parentId, AccessShareLock);
+ TupleDesc tupDesc = RelationGetDescr(parent);
+ AttrNumber parent_attnum;
+
+ parent_attnum = get_attnum(parentId, colName);
+ if (tupDesc->attrs[parent_attnum - 1]->attnotnull)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("column \"%s\" is marked NOT NULL in parent table",
+ colName)));
+ heap_close(parent, AccessShareLock);
+ }
+
+ /*
+ * If the table is a range partitioned table, check that the column
+ * is not in the partition key.
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ PartitionKey key = RelationGetPartitionKey(rel);
+ int partnatts = get_partition_natts(key),
+ i;
+
+ for (i = 0; i < partnatts; i++)
+ {
+ AttrNumber partattnum = get_partition_col_attnum(key, i);
+
+ if (partattnum == attnum)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("column \"%s\" is in range partition key",
+ colName)));
+ }
+ }
+
/*
* Okay, actually perform the catalog change ... if needed
*/
@@ -5281,6 +5655,21 @@ ATExecDropNotNull(Relation rel, const char *colName, LOCKMODE lockmode)
* Return the address of the modified column. If the column was already NOT
* NULL, InvalidObjectAddress is returned.
*/
+
+static void
+ATPrepSetNotNull(Relation rel, bool recurse, bool recursing)
+{
+ /*
+ * If the parent is a partitioned table, like check constraints, NOT NULL
+ * constraints must be added to the child tables.
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
+ !recurse && !recursing)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("constraint must be added to child tables too")));
+}
+
static ObjectAddress
ATExecSetNotNull(AlteredTableInfo *tab, Relation rel,
const char *colName, LOCKMODE lockmode)
@@ -5419,7 +5808,8 @@ ATPrepSetStatistics(Relation rel, const char *colName, Node *newValue, LOCKMODE
if (rel->rd_rel->relkind != RELKIND_RELATION &&
rel->rd_rel->relkind != RELKIND_MATVIEW &&
rel->rd_rel->relkind != RELKIND_INDEX &&
- rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
+ rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+ rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table, materialized view, index, or foreign table",
@@ -5691,6 +6081,68 @@ ATPrepDropColumn(List **wqueue, Relation rel, bool recurse, bool recursing,
cmd->subtype = AT_DropColumnRecurse;
}
+/*
+ * Checks if attnum is a partition attribute for rel
+ *
+ * Sets *used_in_expr if attnum is found to be referenced in some partition
+ * key expression. It's possible for a column to be both used directly and
+ * as part of an expression; if that happens, *used_in_expr may end up as
+ * either true or false. That's OK for current uses of this function, because
+ * *used_in_expr is only used to tailor the error message text.
+ */
+static bool
+is_partition_attr(Relation rel, AttrNumber attnum, bool *used_in_expr)
+{
+ PartitionKey key;
+ int partnatts;
+ List *partexprs;
+ ListCell *partexprs_item;
+ int i;
+
+ if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+ return false;
+
+ key = RelationGetPartitionKey(rel);
+ partnatts = get_partition_natts(key);
+ partexprs = get_partition_exprs(key);
+
+ partexprs_item = list_head(partexprs);
+ for (i = 0; i < partnatts; i++)
+ {
+ AttrNumber partattno = get_partition_col_attnum(key, i);
+
+ if (partattno != 0)
+ {
+ if (attnum == partattno)
+ {
+ if (used_in_expr)
+ *used_in_expr = false;
+ return true;
+ }
+ }
+ else
+ {
+ /* Arbitrary expression */
+ Node *expr = (Node *) lfirst(partexprs_item);
+ Bitmapset *expr_attrs = NULL;
+
+ /* Find all attributes referenced */
+ pull_varattnos(expr, 1, &expr_attrs);
+ partexprs_item = lnext(partexprs_item);
+
+ if (bms_is_member(attnum - FirstLowInvalidHeapAttributeNumber,
+ expr_attrs))
+ {
+ if (used_in_expr)
+ *used_in_expr = true;
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
/*
* Return value is the address of the dropped column.
*/
@@ -5705,6 +6157,7 @@ ATExecDropColumn(List **wqueue, Relation rel, const char *colName,
AttrNumber attnum;
List *children;
ObjectAddress object;
+ bool is_expr;
/* At top level, permission check was done in ATPrepCmd, else do it */
if (recursing)
@@ -5749,6 +6202,19 @@ ATExecDropColumn(List **wqueue, Relation rel, const char *colName,
errmsg("cannot drop inherited column \"%s\"",
colName)));
+ /* Don't drop columns used in the partition key */
+ if (is_partition_attr(rel, attnum, &is_expr))
+ {
+ if (!is_expr)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot drop column named in partition key")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot drop column referenced in partition key expression")));
+ }
+
ReleaseSysCache(tuple);
/*
@@ -5763,6 +6229,15 @@ ATExecDropColumn(List **wqueue, Relation rel, const char *colName,
Relation attr_rel;
ListCell *child;
+ /*
+ * In case of a partitioned table, the column must be dropped from the
+ * partitions as well.
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && !recurse)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("column must be dropped from child tables too")));
+
attr_rel = heap_open(AttributeRelationId, RowExclusiveLock);
foreach(child, children)
{
@@ -6267,6 +6742,12 @@ ATAddForeignKeyConstraint(AlteredTableInfo *tab, Relation rel,
* Validity checks (permission checks wait till we have the column
* numbers)
*/
+ if (pkrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot reference partitioned table \"%s\"",
+ RelationGetRelationName(pkrel))));
+
if (pkrel->rd_rel->relkind != RELKIND_RELATION)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
@@ -7776,6 +8257,16 @@ ATExecDropConstraint(Relation rel, const char *constrName,
}
}
+ /*
+ * In case of a partitioned table, the constraint must be dropped from
+ * the partitions too. There is no such thing as NO INHERIT constraints
+ * in case of partitioned tables.
+ */
+ if (!recurse && rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("constraint must be dropped from child tables too")));
+
/*
* Propagate to children as appropriate. Unlike most other ALTER
* routines, we have to do this one level of recursion at a time; we can't
@@ -7904,6 +8395,7 @@ ATPrepAlterColumnType(List **wqueue,
NewColumnValue *newval;
ParseState *pstate = make_parsestate(NULL);
AclResult aclresult;
+ bool is_expr;
if (rel->rd_rel->reloftype && !recursing)
ereport(ERROR,
@@ -7934,6 +8426,19 @@ ATPrepAlterColumnType(List **wqueue,
errmsg("cannot alter inherited column \"%s\"",
colName)));
+ /* Don't alter columns used in the partition key */
+ if (is_partition_attr(rel, attnum, &is_expr))
+ {
+ if (!is_expr)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot alter type of column named in partition key")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("cannot alter type of column referenced in partition key expression")));
+ }
+
/* Look up the target type */
typenameTypeIdAndMod(NULL, typeName, &targettype, &targettypmod);
@@ -7949,7 +8454,8 @@ ATPrepAlterColumnType(List **wqueue,
list_make1_oid(rel->rd_rel->reltype),
false);
- if (tab->relkind == RELKIND_RELATION)
+ if (tab->relkind == RELKIND_RELATION ||
+ tab->relkind == RELKIND_PARTITIONED_TABLE)
{
/*
* Set up an expression to transform the old data value to the new
@@ -8979,6 +9485,7 @@ ATExecChangeOwner(Oid relationOid, Oid newOwnerId, bool recursing, LOCKMODE lock
case RELKIND_VIEW:
case RELKIND_MATVIEW:
case RELKIND_FOREIGN_TABLE:
+ case RELKIND_PARTITIONED_TABLE:
/* ok to change owner */
break;
case RELKIND_INDEX:
@@ -9440,6 +9947,7 @@ ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation,
case RELKIND_RELATION:
case RELKIND_TOASTVALUE:
case RELKIND_MATVIEW:
+ case RELKIND_PARTITIONED_TABLE:
(void) heap_reloptions(rel->rd_rel->relkind, newOptions, true);
break;
case RELKIND_VIEW:
@@ -9860,7 +10368,8 @@ AlterTableMoveAll(AlterTableMoveAllStmt *stmt)
/* Only move the object type requested */
if ((stmt->objtype == OBJECT_TABLE &&
- relForm->relkind != RELKIND_RELATION) ||
+ relForm->relkind != RELKIND_RELATION &&
+ relForm->relkind != RELKIND_PARTITIONED_TABLE) ||
(stmt->objtype == OBJECT_INDEX &&
relForm->relkind != RELKIND_INDEX) ||
(stmt->objtype == OBJECT_MATVIEW &&
@@ -10059,6 +10568,16 @@ ATPrepAddInherit(Relation child_rel)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("cannot change inheritance of typed table")));
+
+ if (child_rel->rd_rel->relispartition)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot change inheritance of a partition")));
+
+ if (child_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot change inheritance of partitioned table")));
}
/*
@@ -10067,12 +10586,7 @@ ATPrepAddInherit(Relation child_rel)
static ObjectAddress
ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode)
{
- Relation parent_rel,
- catalogRelation;
- SysScanDesc scan;
- ScanKeyData key;
- HeapTuple inheritsTuple;
- int32 inhseqno;
+ Relation parent_rel;
List *children;
ObjectAddress address;
@@ -10110,18 +10624,94 @@ ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode)
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("cannot inherit to temporary relation of another session")));
+ /* Prevent partitioned tables from becoming inheritance parents */
+ if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot inherit from partitioned table \"%s\"",
+ parent->relname)));
+
+ /* Likewise for partitions */
+ if (parent_rel->rd_rel->relispartition)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot inherit from a partition")));
+
/*
- * Check for duplicates in the list of parents, and determine the highest
- * inhseqno already present; we'll use the next one for the new parent.
- * (Note: get RowExclusiveLock because we will write pg_inherits below.)
+ * Prevent circularity by seeing if proposed parent inherits from child.
+ * (In particular, this disallows making a rel inherit from itself.)
*
- * Note: we do not reject the case where the child already inherits from
- * the parent indirectly; CREATE TABLE doesn't reject comparable cases.
+ * This is not completely bulletproof because of race conditions: in
+ * multi-level inheritance trees, someone else could concurrently be
+ * making another inheritance link that closes the loop but does not join
+ * either of the rels we have locked. Preventing that seems to require
+ * exclusive locks on the entire inheritance tree, which is a cure worse
+ * than the disease. find_all_inheritors() will cope with circularity
+ * anyway, so don't sweat it too much.
+ *
+ * We use weakest lock we can on child's children, namely AccessShareLock.
*/
- catalogRelation = heap_open(InheritsRelationId, RowExclusiveLock);
- ScanKeyInit(&key,
- Anum_pg_inherits_inhrelid,
- BTEqualStrategyNumber, F_OIDEQ,
+ children = find_all_inheritors(RelationGetRelid(child_rel),
+ AccessShareLock, NULL);
+
+ if (list_member_oid(children, RelationGetRelid(parent_rel)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_TABLE),
+ errmsg("circular inheritance not allowed"),
+ errdetail("\"%s\" is already a child of \"%s\".",
+ parent->relname,
+ RelationGetRelationName(child_rel))));
+
+ /* If parent has OIDs then child must have OIDs */
+ if (parent_rel->rd_rel->relhasoids && !child_rel->rd_rel->relhasoids)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("table \"%s\" without OIDs cannot inherit from table \"%s\" with OIDs",
+ RelationGetRelationName(child_rel),
+ RelationGetRelationName(parent_rel))));
+
+ /* OK to create inheritance */
+ CreateInheritance(child_rel, parent_rel);
+
+ ObjectAddressSet(address, RelationRelationId,
+ RelationGetRelid(parent_rel));
+
+ /* keep our lock on the parent relation until commit */
+ heap_close(parent_rel, NoLock);
+
+ return address;
+}
+
+/*
+ * CreateInheritance
+ * Catalog manipulation portion of creating inheritance between a child
+ * table and a parent table.
+ *
+ * Common to ATExecAddInherit() and ATExecAttachPartition().
+ */
+static void
+CreateInheritance(Relation child_rel, Relation parent_rel)
+{
+ Relation catalogRelation;
+ SysScanDesc scan;
+ ScanKeyData key;
+ HeapTuple inheritsTuple;
+ int32 inhseqno;
+
+ /* Note: get RowExclusiveLock because we will write pg_inherits below. */
+ catalogRelation = heap_open(InheritsRelationId, RowExclusiveLock);
+
+ /*
+ * Check for duplicates in the list of parents, and determine the highest
+ * inhseqno already present; we'll use the next one for the new parent.
+ * Also, if proposed child is a partition, it cannot already be inheriting.
+ *
+ * Note: we do not reject the case where the child already inherits from
+ * the parent indirectly; CREATE TABLE doesn't reject comparable cases.
+ */
+ ScanKeyInit(&key,
+ Anum_pg_inherits_inhrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(RelationGetRelid(child_rel)));
scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId,
true, NULL, 1, &key);
@@ -10137,44 +10727,12 @@ ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode)
(errcode(ERRCODE_DUPLICATE_TABLE),
errmsg("relation \"%s\" would be inherited from more than once",
RelationGetRelationName(parent_rel))));
+
if (inh->inhseqno > inhseqno)
inhseqno = inh->inhseqno;
}
systable_endscan(scan);
- /*
- * Prevent circularity by seeing if proposed parent inherits from child.
- * (In particular, this disallows making a rel inherit from itself.)
- *
- * This is not completely bulletproof because of race conditions: in
- * multi-level inheritance trees, someone else could concurrently be
- * making another inheritance link that closes the loop but does not join
- * either of the rels we have locked. Preventing that seems to require
- * exclusive locks on the entire inheritance tree, which is a cure worse
- * than the disease. find_all_inheritors() will cope with circularity
- * anyway, so don't sweat it too much.
- *
- * We use weakest lock we can on child's children, namely AccessShareLock.
- */
- children = find_all_inheritors(RelationGetRelid(child_rel),
- AccessShareLock, NULL);
-
- if (list_member_oid(children, RelationGetRelid(parent_rel)))
- ereport(ERROR,
- (errcode(ERRCODE_DUPLICATE_TABLE),
- errmsg("circular inheritance not allowed"),
- errdetail("\"%s\" is already a child of \"%s\".",
- parent->relname,
- RelationGetRelationName(child_rel))));
-
- /* If parent has OIDs then child must have OIDs */
- if (parent_rel->rd_rel->relhasoids && !child_rel->rd_rel->relhasoids)
- ereport(ERROR,
- (errcode(ERRCODE_WRONG_OBJECT_TYPE),
- errmsg("table \"%s\" without OIDs cannot inherit from table \"%s\" with OIDs",
- RelationGetRelationName(child_rel),
- RelationGetRelationName(parent_rel))));
-
/* Match up the columns and bump attinhcount as needed */
MergeAttributesIntoExisting(child_rel, parent_rel);
@@ -10189,16 +10747,8 @@ ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode)
inhseqno + 1,
catalogRelation);
- ObjectAddressSet(address, RelationRelationId,
- RelationGetRelid(parent_rel));
-
/* Now we're done with pg_inherits */
heap_close(catalogRelation, RowExclusiveLock);
-
- /* keep our lock on the parent relation until commit */
- heap_close(parent_rel, NoLock);
-
- return address;
}
/*
@@ -10249,7 +10799,7 @@ constraints_equivalent(HeapTuple a, HeapTuple b, TupleDesc tupleDesc)
* Check columns in child table match up with columns in parent, and increment
* their attinhcount.
*
- * Called by ATExecAddInherit
+ * Called by CreateInheritance
*
* Currently all parent columns must be found in child. Missing columns are an
* error. One day we might consider creating new columns like CREATE TABLE
@@ -10267,12 +10817,17 @@ MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel)
int parent_natts;
TupleDesc tupleDesc;
HeapTuple tuple;
+ bool child_is_partition = false;
attrrel = heap_open(AttributeRelationId, RowExclusiveLock);
tupleDesc = RelationGetDescr(parent_rel);
parent_natts = tupleDesc->natts;
+ /* If parent_rel is a partitioned table, child_rel must be a partition */
+ if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ child_is_partition = true;
+
for (parent_attno = 1; parent_attno <= parent_natts; parent_attno++)
{
Form_pg_attribute attribute = tupleDesc->attrs[parent_attno - 1];
@@ -10320,6 +10875,18 @@ MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel)
* later on, this change will just roll back.)
*/
childatt->attinhcount++;
+
+ /*
+ * In case of partitions, we must enforce that value of attislocal
+ * is same in all partitions. (Note: there are only inherited
+ * attributes in partitions)
+ */
+ if (child_is_partition)
+ {
+ Assert(childatt->attinhcount == 1);
+ childatt->attislocal = false;
+ }
+
simple_heap_update(attrrel, &tuple->t_self, tuple);
CatalogUpdateIndexes(attrrel, tuple);
heap_freetuple(tuple);
@@ -10342,7 +10909,7 @@ MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel)
*
* Constraints that are marked ONLY in the parent are ignored.
*
- * Called by ATExecAddInherit
+ * Called by CreateInheritance
*
* Currently all constraints in parent must be present in the child. One day we
* may consider adding new constraints like CREATE TABLE does.
@@ -10361,10 +10928,15 @@ MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel)
SysScanDesc parent_scan;
ScanKeyData parent_key;
HeapTuple parent_tuple;
+ bool child_is_partition = false;
catalog_relation = heap_open(ConstraintRelationId, RowExclusiveLock);
tuple_desc = RelationGetDescr(catalog_relation);
+ /* If parent_rel is a partitioned table, child_rel must be a partition */
+ if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ child_is_partition = true;
+
/* Outer loop scans through the parent's constraint definitions */
ScanKeyInit(&parent_key,
Anum_pg_constraint_conrelid,
@@ -10441,6 +11013,18 @@ MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel)
child_copy = heap_copytuple(child_tuple);
child_con = (Form_pg_constraint) GETSTRUCT(child_copy);
child_con->coninhcount++;
+
+ /*
+ * In case of partitions, an inherited constraint must be
+ * inherited only once since it cannot have multiple parents and
+ * it is never considered local.
+ */
+ if (child_is_partition)
+ {
+ Assert(child_con->coninhcount == 1);
+ child_con->conislocal = false;
+ }
+
simple_heap_update(catalog_relation, &child_copy->t_self, child_copy);
CatalogUpdateIndexes(catalog_relation, child_copy);
heap_freetuple(child_copy);
@@ -10465,6 +11049,46 @@ MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel)
/*
* ALTER TABLE NO INHERIT
*
+ * Return value is the address of the relation that is no longer parent.
+ */
+static ObjectAddress
+ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
+{
+ ObjectAddress address;
+ Relation parent_rel;
+
+ if (rel->rd_rel->relispartition)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot change inheritance of a partition")));
+
+ /*
+ * AccessShareLock on the parent is probably enough, seeing that DROP
+ * TABLE doesn't lock parent tables at all. We need some lock since we'll
+ * be inspecting the parent's schema.
+ */
+ parent_rel = heap_openrv(parent, AccessShareLock);
+
+ /*
+ * We don't bother to check ownership of the parent table --- ownership of
+ * the child is presumed enough rights.
+ */
+
+ /* Off to RemoveInheritance() where most of the work happens */
+ RemoveInheritance(rel, parent_rel);
+
+ /* keep our lock on the parent relation until commit */
+ heap_close(parent_rel, NoLock);
+
+ ObjectAddressSet(address, RelationRelationId,
+ RelationGetRelid(parent_rel));
+
+ return address;
+}
+
+/*
+ * RemoveInheritance
+ *
* Drop a parent from the child's parents. This just adjusts the attinhcount
* and attislocal of the columns and removes the pg_inherit and pg_depend
* entries.
@@ -10478,13 +11102,11 @@ MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel)
* coninhcount and conislocal for inherited constraints are adjusted in
* exactly the same way.
*
- * Return value is the address of the relation that is no longer parent.
+ * Common to ATExecDropInherit() and ATExecDetachPartition().
*/
-static ObjectAddress
-ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
+static void
+RemoveInheritance(Relation child_rel, Relation parent_rel)
{
- Relation parent_rel;
- Oid parent_oid;
Relation catalogRelation;
SysScanDesc scan;
ScanKeyData key[3];
@@ -10493,19 +11115,11 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
constraintTuple;
List *connames;
bool found = false;
- ObjectAddress address;
+ bool child_is_partition = false;
- /*
- * AccessShareLock on the parent is probably enough, seeing that DROP
- * TABLE doesn't lock parent tables at all. We need some lock since we'll
- * be inspecting the parent's schema.
- */
- parent_rel = heap_openrv(parent, AccessShareLock);
-
- /*
- * We don't bother to check ownership of the parent table --- ownership of
- * the child is presumed enough rights.
- */
+ /* If parent_rel is a partitioned table, child_rel must be a partition */
+ if (parent_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ child_is_partition = true;
/*
* Find and destroy the pg_inherits entry linking the two, or error out if
@@ -10515,7 +11129,7 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
ScanKeyInit(&key[0],
Anum_pg_inherits_inhrelid,
BTEqualStrategyNumber, F_OIDEQ,
- ObjectIdGetDatum(RelationGetRelid(rel)));
+ ObjectIdGetDatum(RelationGetRelid(child_rel)));
scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId,
true, NULL, 1, key);
@@ -10536,11 +11150,20 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
heap_close(catalogRelation, RowExclusiveLock);
if (!found)
- ereport(ERROR,
- (errcode(ERRCODE_UNDEFINED_TABLE),
- errmsg("relation \"%s\" is not a parent of relation \"%s\"",
- RelationGetRelationName(parent_rel),
- RelationGetRelationName(rel))));
+ {
+ if (child_is_partition)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_TABLE),
+ errmsg("relation \"%s\" is not a partition of relation \"%s\"",
+ RelationGetRelationName(child_rel),
+ RelationGetRelationName(parent_rel))));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_TABLE),
+ errmsg("relation \"%s\" is not a parent of relation \"%s\"",
+ RelationGetRelationName(parent_rel),
+ RelationGetRelationName(child_rel))));
+ }
/*
* Search through child columns looking for ones matching parent rel
@@ -10549,7 +11172,7 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
ScanKeyInit(&key[0],
Anum_pg_attribute_attrelid,
BTEqualStrategyNumber, F_OIDEQ,
- ObjectIdGetDatum(RelationGetRelid(rel)));
+ ObjectIdGetDatum(RelationGetRelid(child_rel)));
scan = systable_beginscan(catalogRelation, AttributeRelidNumIndexId,
true, NULL, 1, key);
while (HeapTupleIsValid(attributeTuple = systable_getnext(scan)))
@@ -10611,7 +11234,7 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
ScanKeyInit(&key[0],
Anum_pg_constraint_conrelid,
BTEqualStrategyNumber, F_OIDEQ,
- ObjectIdGetDatum(RelationGetRelid(rel)));
+ ObjectIdGetDatum(RelationGetRelid(child_rel)));
scan = systable_beginscan(catalogRelation, ConstraintRelidIndexId,
true, NULL, 1, key);
@@ -10642,7 +11265,7 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
if (copy_con->coninhcount <= 0) /* shouldn't happen */
elog(ERROR, "relation %u has non-inherited constraint \"%s\"",
- RelationGetRelid(rel), NameStr(copy_con->conname));
+ RelationGetRelid(child_rel), NameStr(copy_con->conname));
copy_con->coninhcount--;
if (copy_con->coninhcount == 0)
@@ -10654,30 +11277,20 @@ ATExecDropInherit(Relation rel, RangeVar *parent, LOCKMODE lockmode)
}
}
- parent_oid = RelationGetRelid(parent_rel);
-
systable_endscan(scan);
heap_close(catalogRelation, RowExclusiveLock);
- drop_parent_dependency(RelationGetRelid(rel),
+ drop_parent_dependency(RelationGetRelid(child_rel),
RelationRelationId,
RelationGetRelid(parent_rel));
-
/*
* Post alter hook of this inherits. Since object_access_hook doesn't take
* multiple object identifiers, we relay oid of parent relation using
* auxiliary_id argument.
*/
InvokeObjectPostAlterHookArg(InheritsRelationId,
- RelationGetRelid(rel), 0,
+ RelationGetRelid(child_rel), 0,
RelationGetRelid(parent_rel), false);
-
- /* keep our lock on the parent relation until commit */
- heap_close(parent_rel, NoLock);
-
- ObjectAddressSet(address, RelationRelationId, parent_oid);
-
- return address;
}
/*
@@ -11499,7 +12112,8 @@ AlterTableNamespaceInternal(Relation rel, Oid oldNspOid, Oid nspOid,
/* Fix other dependent stuff */
if (rel->rd_rel->relkind == RELKIND_RELATION ||
- rel->rd_rel->relkind == RELKIND_MATVIEW)
+ rel->rd_rel->relkind == RELKIND_MATVIEW ||
+ rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
AlterIndexNamespaces(classRel, rel, oldNspOid, nspOid, objsMoved);
AlterSeqNamespaces(classRel, rel, oldNspOid, nspOid,
@@ -11948,7 +12562,7 @@ RangeVarCallbackOwnsTable(const RangeVar *relation,
if (!relkind)
return;
if (relkind != RELKIND_RELATION && relkind != RELKIND_TOASTVALUE &&
- relkind != RELKIND_MATVIEW)
+ relkind != RELKIND_MATVIEW && relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table or materialized view", relation->relname)));
@@ -12105,7 +12719,8 @@ RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid,
relkind != RELKIND_VIEW &&
relkind != RELKIND_MATVIEW &&
relkind != RELKIND_SEQUENCE &&
- relkind != RELKIND_FOREIGN_TABLE)
+ relkind != RELKIND_FOREIGN_TABLE &&
+ relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table, view, materialized view, sequence, or foreign table",
@@ -12113,3 +12728,701 @@ RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid,
ReleaseSysCache(tuple);
}
+
+/*
+ * Transform any expressions present in the partition key
+ */
+static PartitionSpec *
+transformPartitionSpec(Relation rel, PartitionSpec *partspec, char *strategy)
+{
+ PartitionSpec *newspec;
+ ParseState *pstate;
+ RangeTblEntry *rte;
+ ListCell *l;
+
+ newspec = (PartitionSpec *) makeNode(PartitionSpec);
+
+ newspec->strategy = partspec->strategy;
+ newspec->location = partspec->location;
+ newspec->partParams = NIL;
+
+ /* Parse partitioning strategy name */
+ if (!pg_strcasecmp(partspec->strategy, "list"))
+ *strategy = PARTITION_STRATEGY_LIST;
+ else if (!pg_strcasecmp(partspec->strategy, "range"))
+ *strategy = PARTITION_STRATEGY_RANGE;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unrecognized partitioning strategy \"%s\"",
+ partspec->strategy)));
+
+ /*
+ * Create a dummy ParseState and insert the target relation as its sole
+ * rangetable entry. We need a ParseState for transformExpr.
+ */
+ pstate = make_parsestate(NULL);
+ rte = addRangeTableEntryForRelation(pstate, rel, NULL, false, true);
+ addRTEtoQuery(pstate, rte, true, true, true);
+
+ /* take care of any partition expressions */
+ foreach(l, partspec->partParams)
+ {
+ ListCell *lc;
+ PartitionElem *pelem = (PartitionElem *) lfirst(l);
+
+ /* Check for PARTITION BY ... (foo, foo) */
+ foreach(lc, newspec->partParams)
+ {
+ PartitionElem *pparam = (PartitionElem *) lfirst(lc);
+
+ if (pelem->name && pparam->name &&
+ !strcmp(pelem->name, pparam->name))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_COLUMN),
+ errmsg("column \"%s\" appears more than once in partition key",
+ pelem->name),
+ parser_errposition(pstate, pelem->location)));
+ }
+
+ if (pelem->expr)
+ {
+ /* Now do parse transformation of the expression */
+ pelem->expr = transformExpr(pstate, pelem->expr,
+ EXPR_KIND_PARTITION_EXPRESSION);
+
+ /* we have to fix its collations too */
+ assign_expr_collations(pstate, pelem->expr);
+ }
+
+ newspec->partParams = lappend(newspec->partParams, pelem);
+ }
+
+ return newspec;
+}
+
+/*
+ * Compute per-partition-column information from a list of PartitionElem's
+ */
+static void
+ComputePartitionAttrs(Relation rel, List *partParams, AttrNumber *partattrs,
+ List **partexprs, Oid *partopclass, Oid *partcollation)
+{
+ int attn;
+ ListCell *lc;
+
+ attn = 0;
+ foreach(lc, partParams)
+ {
+ PartitionElem *pelem = (PartitionElem *) lfirst(lc);
+ Oid atttype;
+ Oid attcollation;
+
+ if (pelem->name != NULL)
+ {
+ /* Simple attribute reference */
+ HeapTuple atttuple;
+ Form_pg_attribute attform;
+
+ atttuple = SearchSysCacheAttName(RelationGetRelid(rel), pelem->name);
+ if (!HeapTupleIsValid(atttuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" named in partition key does not exist",
+ pelem->name)));
+ attform = (Form_pg_attribute) GETSTRUCT(atttuple);
+
+ if (attform->attnum <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("cannot use system column \"%s\" in partition key",
+ pelem->name)));
+
+ partattrs[attn] = attform->attnum;
+ atttype = attform->atttypid;
+ attcollation = attform->attcollation;
+ ReleaseSysCache(atttuple);
+
+ /* Note that whole-row references can't happen here; see below */
+ }
+ else
+ {
+ /* Expression */
+ Node *expr = pelem->expr;
+
+ Assert(expr != NULL);
+ atttype = exprType(expr);
+ attcollation = exprCollation(expr);
+
+ /*
+ * Strip any top-level COLLATE clause. This ensures that we treat
+ * "x COLLATE y" and "(x COLLATE y)" alike.
+ */
+ while (IsA(expr, CollateExpr))
+ expr = (Node *) ((CollateExpr *) expr)->arg;
+
+ if (IsA(expr, Var) &&
+ ((Var *) expr)->varattno != InvalidAttrNumber)
+ {
+ /*
+ * User wrote "(column)" or "(column COLLATE something)".
+ * Treat it like simple attribute anyway.
+ */
+ partattrs[attn] = ((Var *) expr)->varattno;
+ }
+ else
+ {
+ Bitmapset *expr_attrs = NULL;
+
+ partattrs[attn] = 0; /* marks the column as expression */
+ *partexprs = lappend(*partexprs, expr);
+
+ /*
+ * Note that expression_planner does not change the passed in
+ * expression destructively and we have already saved the
+ * expression to be stored into the catalog above.
+ */
+ expr = (Node *) expression_planner((Expr *) expr);
+
+ /*
+ * Partition expression cannot contain mutable functions,
+ * because a given row must always map to the same partition
+ * as long as there is no change in the partition boundary
+ * structure.
+ */
+ if (contain_mutable_functions(expr))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("functions in partition key expression must be marked IMMUTABLE")));
+
+ /*
+ * While it is not exactly *wrong* for an expression to be
+ * a constant value, it seems better to prevent such input.
+ */
+ if (IsA(expr, Const))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("cannot use constant expression as partition key")));
+
+ /*
+ * transformPartitionSpec() should have already rejected subqueries,
+ * aggregates, window functions, and SRFs, based on the EXPR_KIND_
+ * for partition expressions.
+ */
+
+ /* Cannot have expressions containing whole-row references */
+ pull_varattnos(expr, 1, &expr_attrs);
+ if (bms_is_member(0 - FirstLowInvalidHeapAttributeNumber,
+ expr_attrs))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("partition key expressions cannot contain whole-row references")));
+ }
+ }
+
+ /*
+ * Apply collation override if any
+ */
+ if (pelem->collation)
+ attcollation = get_collation_oid(pelem->collation, false);
+
+ /*
+ * Check we have a collation iff it's a collatable type. The only
+ * expected failures here are (1) COLLATE applied to a noncollatable
+ * type, or (2) partition expression had an unresolved collation.
+ * But we might as well code this to be a complete consistency check.
+ */
+ if (type_is_collatable(atttype))
+ {
+ if (!OidIsValid(attcollation))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for partition expression"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+ else
+ {
+ if (OidIsValid(attcollation))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("collations are not supported by type %s",
+ format_type_be(atttype))));
+ }
+
+ partcollation[attn] = attcollation;
+
+ /*
+ * Identify a btree opclass to use. Currently, we use only btree
+ * operators, which seems enough for list and range partitioning.
+ */
+ if (!pelem->opclass)
+ {
+ partopclass[attn] = GetDefaultOpClass(atttype, BTREE_AM_OID);
+
+ if (!OidIsValid(partopclass[attn]))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("data type %s has no default btree operator class",
+ format_type_be(atttype)),
+ errhint("You must specify a btree operator class or define a default btree operator class for the data type.")));
+ }
+ else
+ partopclass[attn] = ResolveOpClass(pelem->opclass,
+ atttype,
+ "btree",
+ BTREE_AM_OID);
+
+ attn++;
+ }
+}
+
+/*
+ * ALTER TABLE ATTACH PARTITION FOR VALUES
+ *
+ * Return the address of the newly attached partition.
+ */
+static ObjectAddress
+ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd)
+{
+ PartitionKey key = RelationGetPartitionKey(rel);
+ Relation attachRel,
+ catalog;
+ List *childrels;
+ TupleConstr *attachRel_constr;
+ List *partConstraint,
+ *existConstraint;
+ SysScanDesc scan;
+ ScanKeyData skey;
+ HeapTuple tuple;
+ AttrNumber attno;
+ int natts;
+ TupleDesc tupleDesc;
+ bool skip_validate = false;
+ ObjectAddress address;
+
+ attachRel = heap_openrv(cmd->name, AccessExclusiveLock);
+
+ /*
+ * Must be owner of both parent and source table -- parent was checked by
+ * ATSimplePermissions call in ATPrepCmd
+ */
+ ATSimplePermissions(attachRel, ATT_TABLE | ATT_FOREIGN_TABLE);
+
+ /* A partition can only have one parent */
+ if (attachRel->rd_rel->relispartition)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is already a partition",
+ RelationGetRelationName(attachRel))));
+
+ if (attachRel->rd_rel->reloftype)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot attach a typed table as partition")));
+
+ /*
+ * Table being attached should not already be part of inheritance; either
+ * as a child table...
+ */
+ catalog = heap_open(InheritsRelationId, AccessShareLock);
+ ScanKeyInit(&skey,
+ Anum_pg_inherits_inhrelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(attachRel)));
+ scan = systable_beginscan(catalog, InheritsRelidSeqnoIndexId, true,
+ NULL, 1, &skey);
+ if (HeapTupleIsValid(systable_getnext(scan)))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot attach inheritance child as partition")));
+ systable_endscan(scan);
+
+ /* ...or as a parent table (except the case when it is partitioned) */
+ ScanKeyInit(&skey,
+ Anum_pg_inherits_inhparent,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(attachRel)));
+ scan = systable_beginscan(catalog, InheritsParentIndexId, true, NULL,
+ 1, &skey);
+ if (HeapTupleIsValid(systable_getnext(scan)) &&
+ attachRel->rd_rel->relkind == RELKIND_RELATION)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot attach inheritance parent as partition")));
+ systable_endscan(scan);
+ heap_close(catalog, AccessShareLock);
+
+ /*
+ * Prevent circularity by seeing if rel is a partition of attachRel.
+ * (In particular, this disallows making a rel a partition of itself.)
+ */
+ childrels = find_all_inheritors(RelationGetRelid(attachRel),
+ AccessShareLock, NULL);
+ if (list_member_oid(childrels, RelationGetRelid(rel)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_TABLE),
+ errmsg("circular inheritance not allowed"),
+ errdetail("\"%s\" is already a child of \"%s\".",
+ RelationGetRelationName(rel),
+ RelationGetRelationName(attachRel))));
+
+ /* Temp parent cannot have a partition that is itself not a temp */
+ if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+ attachRel->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot attach a permanent relation as partition of temporary relation \"%s\"",
+ RelationGetRelationName(rel))));
+
+ /* If the parent is temp, it must belong to this session */
+ if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+ !rel->rd_islocaltemp)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot attach as partition of temporary relation of another session")));
+
+ /* Ditto for the partition */
+ if (attachRel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
+ !attachRel->rd_islocaltemp)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot attach temporary relation of another session as partition")));
+
+ /* If parent has OIDs then child must have OIDs */
+ if (rel->rd_rel->relhasoids && !attachRel->rd_rel->relhasoids)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot attach table \"%s\" without OIDs as partition of"
+ " table \"%s\" with OIDs", RelationGetRelationName(attachRel),
+ RelationGetRelationName(rel))));
+
+ /* OTOH, if parent doesn't have them, do not allow in attachRel either */
+ if (attachRel->rd_rel->relhasoids && !rel->rd_rel->relhasoids)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot attach table \"%s\" with OIDs as partition of table"
+ " \"%s\" without OIDs", RelationGetRelationName(attachRel),
+ RelationGetRelationName(rel))));
+
+ /* Check if there are any columns in attachRel that aren't in the parent */
+ tupleDesc = RelationGetDescr(attachRel);
+ natts = tupleDesc->natts;
+ for (attno = 1; attno <= natts; attno++)
+ {
+ Form_pg_attribute attribute = tupleDesc->attrs[attno - 1];
+ char *attributeName = NameStr(attribute->attname);
+
+ /* Ignore dropped */
+ if (attribute->attisdropped)
+ continue;
+
+ /* Find same column in parent (matching on column name). */
+ tuple = SearchSysCacheCopyAttName(RelationGetRelid(rel), attributeName);
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("table \"%s\" contains column \"%s\" not found in parent \"%s\"",
+ RelationGetRelationName(attachRel), attributeName,
+ RelationGetRelationName(rel)),
+ errdetail("New partition should contain only the columns present in parent.")));
+ }
+
+ /* OK to create inheritance. Rest of the checks performed there */
+ CreateInheritance(attachRel, rel);
+
+ /*
+ * Check that the new partition's bound is valid and does not overlap any
+ * of existing partitions of the parent - note that it does not return
+ * on error.
+ */
+ check_new_partition_bound(RelationGetRelationName(attachRel), rel,
+ cmd->bound);
+
+ /* Update the pg_class entry. */
+ StorePartitionBound(attachRel, cmd->bound);
+
+ /*
+ * Generate partition constraint from the partition bound specification.
+ * If the parent itself is a partition, make sure to include its
+ * constraint as well.
+ */
+ partConstraint = list_concat(get_qual_from_partbound(attachRel, rel,
+ cmd->bound),
+ RelationGetPartitionQual(rel, true));
+ partConstraint = (List *) eval_const_expressions(NULL,
+ (Node *) partConstraint);
+ partConstraint = (List *) canonicalize_qual((Expr *) partConstraint);
+ partConstraint = list_make1(make_ands_explicit(partConstraint));
+
+ /*
+ * Check if we can do away with having to scan the table being attached
+ * to validate the partition constraint, by *proving* that the existing
+ * constraints of the table *imply* the partition predicate. We include
+ * the table's check constraints and NOT NULL constraints in the list of
+ * clauses passed to predicate_implied_by().
+ *
+ * There is a case in which we cannot rely on just the result of the
+ * proof.
+ */
+ tupleDesc = RelationGetDescr(attachRel);
+ attachRel_constr = tupleDesc->constr;
+ existConstraint = NIL;
+ if (attachRel_constr > 0)
+ {
+ int num_check = attachRel_constr->num_check;
+ int i;
+ Bitmapset *not_null_attrs = NULL;
+ List *part_constr;
+ ListCell *lc;
+ bool partition_accepts_null = true;
+ int partnatts;
+
+ if (attachRel_constr->has_not_null)
+ {
+ int natts = attachRel->rd_att->natts;
+
+ for (i = 1; i <= natts; i++)
+ {
+ Form_pg_attribute att = attachRel->rd_att->attrs[i - 1];
+
+ if (att->attnotnull && !att->attisdropped)
+ {
+ NullTest *ntest = makeNode(NullTest);
+
+ ntest->arg = (Expr *) makeVar(1,
+ i,
+ att->atttypid,
+ att->atttypmod,
+ att->attcollation,
+ 0);
+ ntest->nulltesttype = IS_NOT_NULL;
+
+ /*
+ * argisrow=false is correct even for a composite column,
+ * because attnotnull does not represent a SQL-spec IS NOT
+ * NULL test in such a case, just IS DISTINCT FROM NULL.
+ */
+ ntest->argisrow = false;
+ ntest->location = -1;
+ existConstraint = lappend(existConstraint, ntest);
+ not_null_attrs = bms_add_member(not_null_attrs, i);
+ }
+ }
+ }
+
+ for (i = 0; i < num_check; i++)
+ {
+ Node *cexpr;
+
+ /*
+ * If this constraint hasn't been fully validated yet, we must
+ * ignore it here.
+ */
+ if (!attachRel_constr->check[i].ccvalid)
+ continue;
+
+ cexpr = stringToNode(attachRel_constr->check[i].ccbin);
+
+ /*
+ * Run each expression through const-simplification and
+ * canonicalization. It is necessary, because we will be
+ * comparing it to similarly-processed qual clauses, and may fail
+ * to detect valid matches without this.
+ */
+ cexpr = eval_const_expressions(NULL, cexpr);
+ cexpr = (Node *) canonicalize_qual((Expr *) cexpr);
+
+ existConstraint = list_concat(existConstraint,
+ make_ands_implicit((Expr *) cexpr));
+ }
+
+ existConstraint = list_make1(make_ands_explicit(existConstraint));
+
+ /* And away we go ... */
+ if (predicate_implied_by(partConstraint, existConstraint))
+ skip_validate = true;
+
+ /*
+ * We choose to err on the safer side, ie, give up on skipping the
+ * the validation scan, if the partition key column doesn't have
+ * the NOT NULL constraint and the table is to become a list partition
+ * that does not accept nulls. In this case, the partition predicate
+ * (partConstraint) does include an 'key IS NOT NULL' expression,
+ * however, because of the way predicate_implied_by_simple_clause()
+ * is designed to handle IS NOT NULL predicates in the absence of a
+ * IS NOT NULL clause, we cannot rely on just the above proof.
+ *
+ * That is not an issue in case of a range partition, because if there
+ * were no NOT NULL constraint defined on the key columns, an error
+ * would be thrown before we get here anyway. That is not true,
+ * however, if any of the partition keys is an expression, which is
+ * handled below.
+ */
+ part_constr = linitial(partConstraint);
+ part_constr = make_ands_implicit((Expr *) part_constr);
+
+ /*
+ * part_constr contains an IS NOT NULL expression, if this is a list
+ * partition that does not accept nulls (in fact, also if this is a
+ * range partition and some partition key is an expression, but we
+ * never skip validation in that case anyway; see below)
+ */
+ foreach(lc, part_constr)
+ {
+ Node *expr = lfirst(lc);
+
+ if (IsA(expr, NullTest) &&
+ ((NullTest *) expr)->nulltesttype == IS_NOT_NULL)
+ {
+ partition_accepts_null = false;
+ break;
+ }
+ }
+
+ partnatts = get_partition_natts(key);
+ for (i = 0; i < partnatts; i++)
+ {
+ AttrNumber partattno;
+
+ partattno = get_partition_col_attnum(key, i);
+
+ /* If partition key is an expression, must not skip validation */
+ if (!partition_accepts_null &&
+ (partattno == 0 ||
+ !bms_is_member(partattno, not_null_attrs)))
+ skip_validate = false;
+ }
+ }
+
+ if (skip_validate)
+ elog(NOTICE, "skipping scan to validate partition constraint");
+
+ /*
+ * Set up to have the table to be scanned to validate the partition
+ * constraint (see partConstraint above). If it's a partitioned table,
+ * we instead schdule its leaf partitions to be scanned instead.
+ */
+ if (!skip_validate)
+ {
+ List *all_parts;
+ ListCell *lc;
+
+ /* Take an exclusive lock on the partitions to be checked */
+ if (attachRel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ all_parts = find_all_inheritors(RelationGetRelid(attachRel),
+ AccessExclusiveLock, NULL);
+ else
+ all_parts = list_make1_oid(RelationGetRelid(attachRel));
+
+ foreach(lc, all_parts)
+ {
+ AlteredTableInfo *tab;
+ Oid part_relid = lfirst_oid(lc);
+ Relation part_rel;
+ Expr *constr;
+
+ /* Lock already taken */
+ if (part_relid != RelationGetRelid(attachRel))
+ part_rel = heap_open(part_relid, NoLock);
+ else
+ part_rel = attachRel;
+
+ /*
+ * Skip if it's a partitioned table. Only RELKIND_RELATION
+ * relations (ie, leaf partitions) need to be scanned.
+ */
+ if (part_rel != attachRel &&
+ part_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ heap_close(part_rel, NoLock);
+ continue;
+ }
+
+ /* Grab a work queue entry */
+ tab = ATGetQueueEntry(wqueue, part_rel);
+
+ constr = linitial(partConstraint);
+ tab->partition_constraint = make_ands_implicit((Expr *) constr);
+
+ /* keep our lock until commit */
+ if (part_rel != attachRel)
+ heap_close(part_rel, NoLock);
+ }
+ }
+
+ /*
+ * Invalidate the relcache so that the new partition is now included
+ * in rel's partition descriptor.
+ */
+ CacheInvalidateRelcache(rel);
+
+ ObjectAddressSet(address, RelationRelationId, RelationGetRelid(attachRel));
+
+ /* keep our lock until commit */
+ heap_close(attachRel, NoLock);
+
+ return address;
+}
+
+/*
+ * ALTER TABLE DETACH PARTITION
+ *
+ * Return the address of the relation that is no longer a partition of rel.
+ */
+static ObjectAddress
+ATExecDetachPartition(Relation rel, RangeVar *name)
+{
+ Relation partRel,
+ classRel;
+ HeapTuple tuple,
+ newtuple;
+ Datum new_val[Natts_pg_class];
+ bool isnull,
+ new_null[Natts_pg_class],
+ new_repl[Natts_pg_class];
+ ObjectAddress address;
+
+ partRel = heap_openrv(name, AccessShareLock);
+
+ /* All inheritance related checks are performed within the function */
+ RemoveInheritance(partRel, rel);
+
+ /* Update pg_class tuple */
+ classRel = heap_open(RelationRelationId, RowExclusiveLock);
+ tuple = SearchSysCacheCopy1(RELOID,
+ ObjectIdGetDatum(RelationGetRelid(partRel)));
+ Assert(((Form_pg_class) GETSTRUCT(tuple))->relispartition);
+
+ (void) SysCacheGetAttr(RELOID, tuple, Anum_pg_class_relpartbound,
+ &isnull);
+ Assert(!isnull);
+
+ /* Clear relpartbound and reset relispartition */
+ memset(new_val, 0, sizeof(new_val));
+ memset(new_null, false, sizeof(new_null));
+ memset(new_repl, false, sizeof(new_repl));
+ new_val[Anum_pg_class_relpartbound - 1] = (Datum) 0;
+ new_null[Anum_pg_class_relpartbound - 1] = true;
+ new_repl[Anum_pg_class_relpartbound - 1] = true;
+ newtuple = heap_modify_tuple(tuple, RelationGetDescr(classRel),
+ new_val, new_null, new_repl);
+
+ ((Form_pg_class) GETSTRUCT(newtuple))->relispartition = false;
+ simple_heap_update(classRel, &newtuple->t_self, newtuple);
+ CatalogUpdateIndexes(classRel, newtuple);
+ heap_freetuple(newtuple);
+ heap_close(classRel, RowExclusiveLock);
+
+ /*
+ * Invalidate the relcache so that the partition is no longer included
+ * in our partition descriptor.
+ */
+ CacheInvalidateRelcache(rel);
+
+ ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partRel));
+
+ /* keep our lock until commit */
+ heap_close(partRel, NoLock);
+
+ return address;
+}
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 1c264b7736..02e9693f28 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -176,7 +176,8 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString,
* Triggers must be on tables or views, and there are additional
* relation-type-specific restrictions.
*/
- if (rel->rd_rel->relkind == RELKIND_RELATION)
+ if (rel->rd_rel->relkind == RELKIND_RELATION ||
+ rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
/* Tables can't have INSTEAD OF triggers */
if (stmt->timing != TRIGGER_TYPE_BEFORE &&
@@ -186,6 +187,13 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString,
errmsg("\"%s\" is a table",
RelationGetRelationName(rel)),
errdetail("Tables cannot have INSTEAD OF triggers.")));
+ /* Disallow ROW triggers on partitioned tables */
+ if (stmt->row && rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is a partitioned table",
+ RelationGetRelationName(rel)),
+ errdetail("Partitioned tables cannot have ROW triggers.")));
}
else if (rel->rd_rel->relkind == RELKIND_VIEW)
{
@@ -1211,7 +1219,8 @@ RemoveTriggerById(Oid trigOid)
if (rel->rd_rel->relkind != RELKIND_RELATION &&
rel->rd_rel->relkind != RELKIND_VIEW &&
- rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
+ rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+ rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table, view, or foreign table",
@@ -1316,7 +1325,8 @@ RangeVarCallbackForRenameTrigger(const RangeVar *rv, Oid relid, Oid oldrelid,
/* only tables and views can have triggers */
if (form->relkind != RELKIND_RELATION && form->relkind != RELKIND_VIEW &&
- form->relkind != RELKIND_FOREIGN_TABLE)
+ form->relkind != RELKIND_FOREIGN_TABLE &&
+ form->relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table, view, or foreign table",
diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c
index 056933a584..5e3989acd2 100644
--- a/src/backend/commands/typecmds.c
+++ b/src/backend/commands/typecmds.c
@@ -2107,7 +2107,8 @@ DefineCompositeType(RangeVar *typevar, List *coldeflist)
/*
* Finally create the relation. This also creates the type.
*/
- DefineRelation(createStmt, RELKIND_COMPOSITE_TYPE, InvalidOid, &address);
+ DefineRelation(createStmt, RELKIND_COMPOSITE_TYPE, InvalidOid, &address,
+ NULL);
return address;
}
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 58bbf5548b..b1be2f7ad5 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1314,7 +1314,8 @@ vacuum_rel(Oid relid, RangeVar *relation, int options, VacuumParams *params)
*/
if (onerel->rd_rel->relkind != RELKIND_RELATION &&
onerel->rd_rel->relkind != RELKIND_MATVIEW &&
- onerel->rd_rel->relkind != RELKIND_TOASTVALUE)
+ onerel->rd_rel->relkind != RELKIND_TOASTVALUE &&
+ onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
{
ereport(WARNING,
(errmsg("skipping \"%s\" --- cannot vacuum non-tables or special system tables",
diff --git a/src/backend/commands/view.c b/src/backend/commands/view.c
index 325a81096f..c6b0e4f2b3 100644
--- a/src/backend/commands/view.c
+++ b/src/backend/commands/view.c
@@ -228,7 +228,8 @@ DefineVirtualRelation(RangeVar *relation, List *tlist, bool replace,
* existing view, so we don't need more code to complain if "replace"
* is false).
*/
- address = DefineRelation(createStmt, RELKIND_VIEW, InvalidOid, NULL);
+ address = DefineRelation(createStmt, RELKIND_VIEW, InvalidOid, NULL,
+ NULL);
Assert(address.objectId != InvalidOid);
return address;
}
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 71c07288a1..0f47c7e010 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -42,6 +42,7 @@
#include "access/transam.h"
#include "access/xact.h"
#include "catalog/namespace.h"
+#include "catalog/partition.h"
#include "commands/matview.h"
#include "commands/trigger.h"
#include "executor/execdebug.h"
@@ -825,6 +826,7 @@ InitPlan(QueryDesc *queryDesc, int eflags)
InitResultRelInfo(resultRelInfo,
resultRelation,
resultRelationIndex,
+ true,
estate->es_instrument);
resultRelInfo++;
}
@@ -1019,6 +1021,7 @@ CheckValidResultRel(Relation resultRel, CmdType operation)
switch (resultRel->rd_rel->relkind)
{
case RELKIND_RELATION:
+ case RELKIND_PARTITIONED_TABLE:
/* OK */
break;
case RELKIND_SEQUENCE:
@@ -1152,6 +1155,7 @@ CheckValidRowMarkRel(Relation rel, RowMarkType markType)
switch (rel->rd_rel->relkind)
{
case RELKIND_RELATION:
+ case RELKIND_PARTITIONED_TABLE:
/* OK */
break;
case RELKIND_SEQUENCE:
@@ -1212,6 +1216,7 @@ void
InitResultRelInfo(ResultRelInfo *resultRelInfo,
Relation resultRelationDesc,
Index resultRelationIndex,
+ bool load_partition_check,
int instrument_options)
{
MemSet(resultRelInfo, 0, sizeof(ResultRelInfo));
@@ -1249,6 +1254,10 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo,
resultRelInfo->ri_ConstraintExprs = NULL;
resultRelInfo->ri_junkFilter = NULL;
resultRelInfo->ri_projectReturning = NULL;
+ if (load_partition_check)
+ resultRelInfo->ri_PartitionCheck =
+ RelationGetPartitionQual(resultRelationDesc,
+ true);
}
/*
@@ -1311,6 +1320,7 @@ ExecGetTriggerResultRel(EState *estate, Oid relid)
InitResultRelInfo(rInfo,
rel,
0, /* dummy rangetable index */
+ true,
estate->es_instrument);
estate->es_trig_target_relations =
lappend(estate->es_trig_target_relations, rInfo);
@@ -1691,6 +1701,46 @@ ExecRelCheck(ResultRelInfo *resultRelInfo,
return NULL;
}
+/*
+ * ExecPartitionCheck --- check that tuple meets the partition constraint.
+ *
+ * Note: This is called *iff* resultRelInfo is the main target table.
+ */
+static bool
+ExecPartitionCheck(ResultRelInfo *resultRelInfo, TupleTableSlot *slot,
+ EState *estate)
+{
+ ExprContext *econtext;
+
+ /*
+ * If first time through, build expression state tree for the partition
+ * check expression. Keep it in the per-query memory context so they'll
+ * survive throughout the query.
+ */
+ if (resultRelInfo->ri_PartitionCheckExpr == NULL)
+ {
+ List *qual = resultRelInfo->ri_PartitionCheck;
+
+ resultRelInfo->ri_PartitionCheckExpr = (List *)
+ ExecPrepareExpr((Expr *) qual, estate);
+ }
+
+ /*
+ * We will use the EState's per-tuple context for evaluating constraint
+ * expressions (creating it if it's not already there).
+ */
+ econtext = GetPerTupleExprContext(estate);
+
+ /* Arrange for econtext's scan tuple to be the tuple under test */
+ econtext->ecxt_scantuple = slot;
+
+ /*
+ * As in case of the catalogued constraints, we treat a NULL result as
+ * success here, not a failure.
+ */
+ return ExecQual(resultRelInfo->ri_PartitionCheckExpr, econtext, true);
+}
+
void
ExecConstraints(ResultRelInfo *resultRelInfo,
TupleTableSlot *slot, EState *estate)
@@ -1702,9 +1752,9 @@ ExecConstraints(ResultRelInfo *resultRelInfo,
Bitmapset *insertedCols;
Bitmapset *updatedCols;
- Assert(constr);
+ Assert(constr || resultRelInfo->ri_PartitionCheck);
- if (constr->has_not_null)
+ if (constr && constr->has_not_null)
{
int natts = tupdesc->natts;
int attrChk;
@@ -1735,7 +1785,7 @@ ExecConstraints(ResultRelInfo *resultRelInfo,
}
}
- if (constr->num_check > 0)
+ if (constr && constr->num_check > 0)
{
const char *failed;
@@ -1759,6 +1809,26 @@ ExecConstraints(ResultRelInfo *resultRelInfo,
errtableconstraint(rel, failed)));
}
}
+
+ if (resultRelInfo->ri_PartitionCheck &&
+ !ExecPartitionCheck(resultRelInfo, slot, estate))
+ {
+ char *val_desc;
+
+ insertedCols = GetInsertedColumns(resultRelInfo, estate);
+ updatedCols = GetUpdatedColumns(resultRelInfo, estate);
+ modifiedCols = bms_union(insertedCols, updatedCols);
+ val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
+ slot,
+ tupdesc,
+ modifiedCols,
+ 64);
+ ereport(ERROR,
+ (errcode(ERRCODE_CHECK_VIOLATION),
+ errmsg("new row for relation \"%s\" violates partition constraint",
+ RelationGetRelationName(rel)),
+ val_desc ? errdetail("Failing row contains %s.", val_desc) : 0));
+ }
}
/*
@@ -2926,3 +2996,52 @@ EvalPlanQualEnd(EPQState *epqstate)
epqstate->planstate = NULL;
epqstate->origslot = NULL;
}
+
+/*
+ * ExecFindPartition -- Find a leaf partition in the partition tree rooted
+ * at parent, for the heap tuple contained in *slot
+ *
+ * estate must be non-NULL; we'll need it to compute any expressions in the
+ * partition key(s)
+ *
+ * If no leaf partition is found, this routine errors out with the appropriate
+ * error message, else it returns the leaf partition sequence number returned
+ * by get_partition_for_tuple() unchanged.
+ */
+int
+ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd,
+ TupleTableSlot *slot, EState *estate)
+{
+ int result;
+ Oid failed_at;
+ ExprContext *econtext = GetPerTupleExprContext(estate);
+
+ econtext->ecxt_scantuple = slot;
+ result = get_partition_for_tuple(pd, slot, estate, &failed_at);
+ if (result < 0)
+ {
+ Relation rel = resultRelInfo->ri_RelationDesc;
+ char *val_desc;
+ Bitmapset *insertedCols,
+ *updatedCols,
+ *modifiedCols;
+ TupleDesc tupDesc = RelationGetDescr(rel);
+
+ insertedCols = GetInsertedColumns(resultRelInfo, estate);
+ updatedCols = GetUpdatedColumns(resultRelInfo, estate);
+ modifiedCols = bms_union(insertedCols, updatedCols);
+ val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel),
+ slot,
+ tupDesc,
+ modifiedCols,
+ 64);
+ Assert(OidIsValid(failed_at));
+ ereport(ERROR,
+ (errcode(ERRCODE_CHECK_VIOLATION),
+ errmsg("no partition of relation \"%s\" found for row",
+ get_rel_name(failed_at)),
+ val_desc ? errdetail("Failing row contains %s.", val_desc) : 0));
+ }
+
+ return result;
+}
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index efb0c5e8e5..c0b58d1841 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -258,6 +258,7 @@ ExecInsert(ModifyTableState *mtstate,
{
HeapTuple tuple;
ResultRelInfo *resultRelInfo;
+ ResultRelInfo *saved_resultRelInfo = NULL;
Relation resultRelationDesc;
Oid newId;
List *recheckIndexes = NIL;
@@ -272,6 +273,56 @@ ExecInsert(ModifyTableState *mtstate,
* get information on the (current) result relation
*/
resultRelInfo = estate->es_result_relation_info;
+
+ /* Determine the partition to heap_insert the tuple into */
+ if (mtstate->mt_partition_dispatch_info)
+ {
+ int leaf_part_index;
+ TupleConversionMap *map;
+
+ /*
+ * Away we go ... If we end up not finding a partition after all,
+ * ExecFindPartition() does not return and errors out instead.
+ * Otherwise, the returned value is to be used as an index into
+ * arrays mt_partitions[] and mt_partition_tupconv_maps[] that
+ * will get us the ResultRelInfo and TupleConversionMap for the
+ * partition, respectively.
+ */
+ leaf_part_index = ExecFindPartition(resultRelInfo,
+ mtstate->mt_partition_dispatch_info,
+ slot,
+ estate);
+ Assert(leaf_part_index >= 0 &&
+ leaf_part_index < mtstate->mt_num_partitions);
+
+ /*
+ * Save the old ResultRelInfo and switch to the one corresponding to
+ * the selected partition.
+ */
+ saved_resultRelInfo = resultRelInfo;
+ resultRelInfo = mtstate->mt_partitions + leaf_part_index;
+
+ /* We do not yet have a way to insert into a foreign partition */
+ if (resultRelInfo->ri_FdwRoutine)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot route inserted tuples to a foreign table")));
+
+ /* For ExecInsertIndexTuples() to work on the partition's indexes */
+ estate->es_result_relation_info = resultRelInfo;
+
+ /*
+ * We might need to convert from the parent rowtype to the partition
+ * rowtype.
+ */
+ map = mtstate->mt_partition_tupconv_maps[leaf_part_index];
+ if (map)
+ {
+ tuple = do_convert_tuple(tuple, map);
+ ExecStoreTuple(tuple, slot, InvalidBuffer, true);
+ }
+ }
+
resultRelationDesc = resultRelInfo->ri_RelationDesc;
/*
@@ -369,7 +420,7 @@ ExecInsert(ModifyTableState *mtstate,
/*
* Check the constraints of the tuple
*/
- if (resultRelationDesc->rd_att->constr)
+ if (resultRelationDesc->rd_att->constr || resultRelInfo->ri_PartitionCheck)
ExecConstraints(resultRelInfo, slot, estate);
if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > 0)
@@ -511,6 +562,12 @@ ExecInsert(ModifyTableState *mtstate,
list_free(recheckIndexes);
+ if (saved_resultRelInfo)
+ {
+ resultRelInfo = saved_resultRelInfo;
+ estate->es_result_relation_info = resultRelInfo;
+ }
+
/*
* Check any WITH CHECK OPTION constraints from parent views. We are
* required to do this after testing all constraints and uniqueness
@@ -922,7 +979,7 @@ lreplace:;
/*
* Check the constraints of the tuple
*/
- if (resultRelationDesc->rd_att->constr)
+ if (resultRelationDesc->rd_att->constr || resultRelInfo->ri_PartitionCheck)
ExecConstraints(resultRelInfo, slot, estate);
/*
@@ -1565,6 +1622,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
Plan *subplan;
ListCell *l;
int i;
+ Relation rel;
/* check for unsupported flags */
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
@@ -1655,6 +1713,75 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
estate->es_result_relation_info = saved_resultRelInfo;
+ /* Build state for INSERT tuple routing */
+ rel = mtstate->resultRelInfo->ri_RelationDesc;
+ if (operation == CMD_INSERT &&
+ rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ PartitionDispatch *pd;
+ int i,
+ j,
+ num_parted,
+ num_leaf_parts;
+ List *leaf_parts;
+ ListCell *cell;
+ ResultRelInfo *leaf_part_rri;
+
+ /* Form the partition node tree and lock partitions */
+ pd = RelationGetPartitionDispatchInfo(rel, RowExclusiveLock,
+ &num_parted, &leaf_parts);
+ mtstate->mt_partition_dispatch_info = pd;
+ mtstate->mt_num_dispatch = num_parted;
+ num_leaf_parts = list_length(leaf_parts);
+ mtstate->mt_num_partitions = num_leaf_parts;
+ mtstate->mt_partitions = (ResultRelInfo *)
+ palloc0(num_leaf_parts * sizeof(ResultRelInfo));
+ mtstate->mt_partition_tupconv_maps = (TupleConversionMap **)
+ palloc0(num_leaf_parts * sizeof(TupleConversionMap *));
+
+ leaf_part_rri = mtstate->mt_partitions;
+ i = j = 0;
+ foreach(cell, leaf_parts)
+ {
+ Oid partrelid = lfirst_oid(cell);
+ Relation partrel;
+
+ /*
+ * We locked all the partitions above including the leaf
+ * partitions. Note that each of the relations in
+ * mtstate->mt_partitions will be closed by ExecEndModifyTable().
+ */
+ partrel = heap_open(partrelid, NoLock);
+
+ /*
+ * Verify result relation is a valid target for the current
+ * operation
+ */
+ CheckValidResultRel(partrel, CMD_INSERT);
+
+ InitResultRelInfo(leaf_part_rri,
+ partrel,
+ 1, /* dummy */
+ false, /* no partition constraint checks */
+ eflags);
+
+ /* Open partition indices (note: ON CONFLICT unsupported)*/
+ if (partrel->rd_rel->relhasindex && operation != CMD_DELETE &&
+ leaf_part_rri->ri_IndexRelationDescs == NULL)
+ ExecOpenIndices(leaf_part_rri, false);
+
+ if (!equalTupleDescs(RelationGetDescr(rel),
+ RelationGetDescr(partrel)))
+ mtstate->mt_partition_tupconv_maps[i] =
+ convert_tuples_by_name(RelationGetDescr(rel),
+ RelationGetDescr(partrel),
+ gettext_noop("could not convert row type"));
+
+ leaf_part_rri++;
+ i++;
+ }
+ }
+
/*
* Initialize any WITH CHECK OPTION constraints if needed.
*/
@@ -1886,7 +2013,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind;
if (relkind == RELKIND_RELATION ||
- relkind == RELKIND_MATVIEW)
+ relkind == RELKIND_MATVIEW ||
+ relkind == RELKIND_PARTITIONED_TABLE)
{
j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid");
if (!AttributeNumberIsValid(j->jf_junkAttNo))
@@ -1971,6 +2099,26 @@ ExecEndModifyTable(ModifyTableState *node)
resultRelInfo);
}
+ /* Close all the partitioned tables, leaf partitions, and their indices
+ *
+ * Remember node->mt_partition_dispatch_info[0] corresponds to the root
+ * partitioned table, which we must not try to close, because it is the
+ * main target table of the query that will be closed by ExecEndPlan().
+ */
+ for (i = 1; i < node->mt_num_dispatch; i++)
+ {
+ PartitionDispatch pd = node->mt_partition_dispatch_info[i];
+
+ heap_close(pd->reldesc, NoLock);
+ }
+ for (i = 0; i < node->mt_num_partitions; i++)
+ {
+ ResultRelInfo *resultRelInfo = node->mt_partitions + i;
+
+ ExecCloseIndices(resultRelInfo);
+ heap_close(resultRelInfo->ri_RelationDesc, NoLock);
+ }
+
/*
* Free the exprcontext
*/
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index dd66adb0b2..e30c57e86b 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -3030,6 +3030,8 @@ CopyCreateStmtFields(const CreateStmt *from, CreateStmt *newnode)
COPY_NODE_FIELD(relation);
COPY_NODE_FIELD(tableElts);
COPY_NODE_FIELD(inhRelations);
+ COPY_NODE_FIELD(partspec);
+ COPY_NODE_FIELD(partbound);
COPY_NODE_FIELD(ofTypename);
COPY_NODE_FIELD(constraints);
COPY_NODE_FIELD(options);
@@ -4188,6 +4190,70 @@ _copyAlterPolicyStmt(const AlterPolicyStmt *from)
return newnode;
}
+static PartitionSpec *
+_copyPartitionSpec(const PartitionSpec *from)
+{
+
+ PartitionSpec *newnode = makeNode(PartitionSpec);
+
+ COPY_STRING_FIELD(strategy);
+ COPY_NODE_FIELD(partParams);
+ COPY_LOCATION_FIELD(location);
+
+ return newnode;
+}
+
+static PartitionElem *
+_copyPartitionElem(const PartitionElem *from)
+{
+ PartitionElem *newnode = makeNode(PartitionElem);
+
+ COPY_STRING_FIELD(name);
+ COPY_NODE_FIELD(expr);
+ COPY_NODE_FIELD(collation);
+ COPY_NODE_FIELD(opclass);
+ COPY_LOCATION_FIELD(location);
+
+ return newnode;
+}
+
+static PartitionBoundSpec *
+_copyPartitionBoundSpec(const PartitionBoundSpec *from)
+{
+ PartitionBoundSpec *newnode = makeNode(PartitionBoundSpec);
+
+ COPY_SCALAR_FIELD(strategy);
+ COPY_NODE_FIELD(listdatums);
+ COPY_NODE_FIELD(lowerdatums);
+ COPY_NODE_FIELD(upperdatums);
+ COPY_LOCATION_FIELD(location);
+
+ return newnode;
+}
+
+static PartitionRangeDatum *
+_copyPartitionRangeDatum(const PartitionRangeDatum *from)
+{
+ PartitionRangeDatum *newnode = makeNode(PartitionRangeDatum);
+
+ COPY_SCALAR_FIELD(infinite);
+ COPY_NODE_FIELD(value);
+ COPY_LOCATION_FIELD(location);
+
+ return newnode;
+}
+
+static PartitionCmd *
+_copyPartitionCmd(const PartitionCmd *from)
+{
+ PartitionCmd *newnode = makeNode(PartitionCmd);
+
+ COPY_NODE_FIELD(name);
+ COPY_NODE_FIELD(bound);
+
+ return newnode;
+}
+
/* ****************************************************************
* pg_list.h copy functions
* ****************************************************************
@@ -5105,6 +5171,21 @@ copyObject(const void *from)
case T_TriggerTransition:
retval = _copyTriggerTransition(from);
break;
+ case T_PartitionSpec:
+ retval = _copyPartitionSpec(from);
+ break;
+ case T_PartitionElem:
+ retval = _copyPartitionElem(from);
+ break;
+ case T_PartitionBoundSpec:
+ retval = _copyPartitionBoundSpec(from);
+ break;
+ case T_PartitionRangeDatum:
+ retval = _copyPartitionRangeDatum(from);
+ break;
+ case T_PartitionCmd:
+ retval = _copyPartitionCmd(from);
+ break;
/*
* MISCELLANEOUS NODES
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index cad3aebecd..b7a109cfb0 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -1168,6 +1168,8 @@ _equalCreateStmt(const CreateStmt *a, const CreateStmt *b)
COMPARE_NODE_FIELD(relation);
COMPARE_NODE_FIELD(tableElts);
COMPARE_NODE_FIELD(inhRelations);
+ COMPARE_NODE_FIELD(partspec);
+ COMPARE_NODE_FIELD(partbound);
COMPARE_NODE_FIELD(ofTypename);
COMPARE_NODE_FIELD(constraints);
COMPARE_NODE_FIELD(options);
@@ -2646,6 +2648,59 @@ _equalTriggerTransition(const TriggerTransition *a, const TriggerTransition *b)
return true;
}
+static bool
+_equalPartitionSpec(const PartitionSpec *a, const PartitionSpec *b)
+{
+ COMPARE_STRING_FIELD(strategy);
+ COMPARE_NODE_FIELD(partParams);
+ COMPARE_LOCATION_FIELD(location);
+
+ return true;
+}
+
+static bool
+_equalPartitionElem(const PartitionElem *a, const PartitionElem *b)
+{
+ COMPARE_STRING_FIELD(name);
+ COMPARE_NODE_FIELD(expr);
+ COMPARE_NODE_FIELD(collation);
+ COMPARE_NODE_FIELD(opclass);
+ COMPARE_LOCATION_FIELD(location);
+
+ return true;
+}
+
+static bool
+_equalPartitionBoundSpec(const PartitionBoundSpec *a, const PartitionBoundSpec *b)
+{
+ COMPARE_SCALAR_FIELD(strategy);
+ COMPARE_NODE_FIELD(listdatums);
+ COMPARE_NODE_FIELD(lowerdatums);
+ COMPARE_NODE_FIELD(upperdatums);
+ COMPARE_LOCATION_FIELD(location);
+
+ return true;
+}
+
+static bool
+_equalPartitionRangeDatum(const PartitionRangeDatum *a, const PartitionRangeDatum *b)
+{
+ COMPARE_SCALAR_FIELD(infinite);
+ COMPARE_NODE_FIELD(value);
+ COMPARE_LOCATION_FIELD(location);
+
+ return true;
+}
+
+static bool
+_equalPartitionCmd(const PartitionCmd *a, const PartitionCmd *b)
+{
+ COMPARE_NODE_FIELD(name);
+ COMPARE_NODE_FIELD(bound);
+
+ return true;
+}
+
/*
* Stuff from pg_list.h
*/
@@ -3402,6 +3457,21 @@ equal(const void *a, const void *b)
case T_TriggerTransition:
retval = _equalTriggerTransition(a, b);
break;
+ case T_PartitionSpec:
+ retval = _equalPartitionSpec(a, b);
+ break;
+ case T_PartitionElem:
+ retval = _equalPartitionElem(a, b);
+ break;
+ case T_PartitionBoundSpec:
+ retval = _equalPartitionBoundSpec(a, b);
+ break;
+ case T_PartitionRangeDatum:
+ retval = _equalPartitionRangeDatum(a, b);
+ break;
+ case T_PartitionCmd:
+ retval = _equalPartitionCmd(a, b);
+ break;
default:
elog(ERROR, "unrecognized node type: %d",
diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c
index 399744193c..973fb152c1 100644
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -1552,6 +1552,12 @@ exprLocation(const Node *expr)
/* just use nested expr's location */
loc = exprLocation((Node *) ((const InferenceElem *) expr)->expr);
break;
+ case T_PartitionBoundSpec:
+ loc = ((const PartitionBoundSpec *) expr)->location;
+ break;
+ case T_PartitionRangeDatum:
+ loc = ((const PartitionRangeDatum *) expr)->location;
+ break;
default:
/* for any other node type it's just unknown... */
loc = -1;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 748b687929..0d858f5920 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -2392,6 +2392,8 @@ _outCreateStmtInfo(StringInfo str, const CreateStmt *node)
WRITE_NODE_FIELD(relation);
WRITE_NODE_FIELD(tableElts);
WRITE_NODE_FIELD(inhRelations);
+ WRITE_NODE_FIELD(partspec);
+ WRITE_NODE_FIELD(partbound);
WRITE_NODE_FIELD(ofTypename);
WRITE_NODE_FIELD(constraints);
WRITE_NODE_FIELD(options);
@@ -3277,6 +3279,47 @@ _outForeignKeyCacheInfo(StringInfo str, const ForeignKeyCacheInfo *node)
appendStringInfo(str, " %u", node->conpfeqop[i]);
}
+static void
+_outPartitionSpec(StringInfo str, const PartitionSpec *node)
+{
+ WRITE_NODE_TYPE("PARTITIONBY");
+
+ WRITE_STRING_FIELD(strategy);
+ WRITE_NODE_FIELD(partParams);
+ WRITE_LOCATION_FIELD(location);
+}
+
+static void
+_outPartitionElem(StringInfo str, const PartitionElem *node)
+{
+ WRITE_NODE_TYPE("PARTITIONELEM");
+
+ WRITE_STRING_FIELD(name);
+ WRITE_NODE_FIELD(expr);
+ WRITE_NODE_FIELD(collation);
+ WRITE_NODE_FIELD(opclass);
+ WRITE_LOCATION_FIELD(location);
+}
+
+static void
+_outPartitionBoundSpec(StringInfo str, const PartitionBoundSpec *node)
+{
+ WRITE_NODE_TYPE("PARTITIONBOUND");
+
+ WRITE_CHAR_FIELD(strategy);
+ WRITE_NODE_FIELD(listdatums);
+ WRITE_NODE_FIELD(lowerdatums);
+ WRITE_NODE_FIELD(upperdatums);
+}
+
+static void
+_outPartitionRangeDatum(StringInfo str, const PartitionRangeDatum *node)
+{
+ WRITE_NODE_TYPE("PARTRANGEDATUM");
+
+ WRITE_BOOL_FIELD(infinite);
+ WRITE_NODE_FIELD(value);
+}
/*
* outNode -
@@ -3865,6 +3908,18 @@ outNode(StringInfo str, const void *obj)
case T_TriggerTransition:
_outTriggerTransition(str, obj);
break;
+ case T_PartitionSpec:
+ _outPartitionSpec(str, obj);
+ break;
+ case T_PartitionElem:
+ _outPartitionElem(str, obj);
+ break;
+ case T_PartitionBoundSpec:
+ _outPartitionBoundSpec(str, obj);
+ break;
+ case T_PartitionRangeDatum:
+ _outPartitionRangeDatum(str, obj);
+ break;
default:
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 917e6c8a65..c587d4e1d7 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -2265,6 +2265,36 @@ _readExtensibleNode(void)
READ_DONE();
}
+/*
+ * _readPartitionBoundSpec
+ */
+static PartitionBoundSpec *
+_readPartitionBoundSpec(void)
+{
+ READ_LOCALS(PartitionBoundSpec);
+
+ READ_CHAR_FIELD(strategy);
+ READ_NODE_FIELD(listdatums);
+ READ_NODE_FIELD(lowerdatums);
+ READ_NODE_FIELD(upperdatums);
+
+ READ_DONE();
+}
+
+/*
+ * _readPartitionRangeDatum
+ */
+static PartitionRangeDatum *
+_readPartitionRangeDatum(void)
+{
+ READ_LOCALS(PartitionRangeDatum);
+
+ READ_BOOL_FIELD(infinite);
+ READ_NODE_FIELD(value);
+
+ READ_DONE();
+}
+
/*
* parseNodeString
*
@@ -2497,6 +2527,10 @@ parseNodeString(void)
return_value = _readAlternativeSubPlan();
else if (MATCH("EXTENSIBLENODE", 14))
return_value = _readExtensibleNode();
+ else if (MATCH("PARTITIONBOUND", 14))
+ return_value = _readPartitionBoundSpec();
+ else if (MATCH("PARTRANGEDATUM", 14))
+ return_value = _readPartitionRangeDatum();
else
{
elog(ERROR, "badly formatted node string \"%.32s\"...", token);
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index bb16c59028..72272d9bb7 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -27,6 +27,7 @@
#include "catalog/catalog.h"
#include "catalog/dependency.h"
#include "catalog/heap.h"
+#include "catalog/partition.h"
#include "catalog/pg_am.h"
#include "foreign/fdwapi.h"
#include "miscadmin.h"
@@ -1140,6 +1141,7 @@ get_relation_constraints(PlannerInfo *root,
Index varno = rel->relid;
Relation relation;
TupleConstr *constr;
+ List *pcqual;
/*
* We assume the relation has already been safely locked.
@@ -1225,6 +1227,24 @@ get_relation_constraints(PlannerInfo *root,
}
}
+ /* Append partition predicates, if any */
+ pcqual = RelationGetPartitionQual(relation, true);
+ if (pcqual)
+ {
+ /*
+ * Run each expression through const-simplification and
+ * canonicalization similar to check constraints.
+ */
+ pcqual = (List *) eval_const_expressions(root, (Node *) pcqual);
+ pcqual = (List *) canonicalize_qual((Expr *) pcqual);
+
+ /* Fix Vars to have the desired varno */
+ if (varno != 1)
+ ChangeVarNodes((Node *) pcqual, 1, varno, 0);
+
+ result = list_concat(result, pcqual);
+ }
+
heap_close(relation, NoLock);
return result;
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 1a541788eb..7364346167 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -806,8 +806,16 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
/* Process ON CONFLICT, if any. */
if (stmt->onConflictClause)
+ {
+ /* Bail out if target relation is partitioned table */
+ if (pstate->p_target_rangetblentry->relkind == RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("ON CONFLICT clause is not supported with partitioned tables")));
+
qry->onConflict = transformOnConflictClause(pstate,
stmt->onConflictClause);
+ }
/*
* If we have a RETURNING clause, we need to add the target relation to
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 414348b95b..2ed7b5259d 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -229,6 +229,9 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
struct ImportQual *importqual;
InsertStmt *istmt;
VariableSetStmt *vsetstmt;
+ PartitionElem *partelem;
+ PartitionSpec *partspec;
+ PartitionRangeDatum *partrange_datum;
}
%type stmt schema_stmt
@@ -276,7 +279,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%type add_drop opt_asc_desc opt_nulls_order
%type alter_table_cmd alter_type_cmd opt_collate_clause
- replica_identity
+ replica_identity partition_cmd
%type alter_table_cmds alter_type_cmds
%type opt_drop_behavior
@@ -545,6 +548,17 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
opt_frame_clause frame_extent frame_bound
%type opt_existing_window_name
%type opt_if_not_exists
+%type PartitionSpec OptPartitionSpec
+%type part_strategy
+%type part_elem
+%type part_params
+%type OptPartitionElementList PartitionElementList
+%type PartitionElement
+%type ForValues
+%type partbound_datum
+%type partbound_datum_list
+%type PartitionRangeDatum
+%type range_datum_list
/*
* Non-keyword token types. These are hard-wired into the "flex" lexer.
@@ -570,7 +584,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
/* ordinary key words in alphabetical order */
%token ABORT_P ABSOLUTE_P ACCESS ACTION ADD_P ADMIN AFTER
AGGREGATE ALL ALSO ALTER ALWAYS ANALYSE ANALYZE AND ANY ARRAY AS ASC
- ASSERTION ASSIGNMENT ASYMMETRIC AT ATTRIBUTE AUTHORIZATION
+ ASSERTION ASSIGNMENT ASYMMETRIC AT ATTACH ATTRIBUTE AUTHORIZATION
BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT
BOOLEAN_P BOTH BY
@@ -586,7 +600,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
DATA_P DATABASE DAY_P DEALLOCATE DEC DECIMAL_P DECLARE DEFAULT DEFAULTS
DEFERRABLE DEFERRED DEFINER DELETE_P DELIMITER DELIMITERS DEPENDS DESC
- DICTIONARY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P DOUBLE_P DROP
+ DETACH DICTIONARY DISABLE_P DISCARD DISTINCT DO DOCUMENT_P DOMAIN_P
+ DOUBLE_P DROP
EACH ELSE ENABLE_P ENCODING ENCRYPTED END_P ENUM_P ESCAPE EVENT EXCEPT
EXCLUDE EXCLUDING EXCLUSIVE EXECUTE EXISTS EXPLAIN
@@ -1787,6 +1802,24 @@ AlterTableStmt:
n->missing_ok = true;
$$ = (Node *)n;
}
+ | ALTER TABLE relation_expr partition_cmd
+ {
+ AlterTableStmt *n = makeNode(AlterTableStmt);
+ n->relation = $3;
+ n->cmds = list_make1($4);
+ n->relkind = OBJECT_TABLE;
+ n->missing_ok = false;
+ $$ = (Node *)n;
+ }
+ | ALTER TABLE IF_P EXISTS relation_expr partition_cmd
+ {
+ AlterTableStmt *n = makeNode(AlterTableStmt);
+ n->relation = $5;
+ n->cmds = list_make1($6);
+ n->relkind = OBJECT_TABLE;
+ n->missing_ok = true;
+ $$ = (Node *)n;
+ }
| ALTER TABLE ALL IN_P TABLESPACE name SET TABLESPACE name opt_nowait
{
AlterTableMoveAllStmt *n =
@@ -1932,6 +1965,34 @@ alter_table_cmds:
| alter_table_cmds ',' alter_table_cmd { $$ = lappend($1, $3); }
;
+partition_cmd:
+ /* ALTER TABLE ATTACH PARTITION FOR VALUES */
+ ATTACH PARTITION qualified_name ForValues
+ {
+ AlterTableCmd *n = makeNode(AlterTableCmd);
+ PartitionCmd *cmd = makeNode(PartitionCmd);
+
+ n->subtype = AT_AttachPartition;
+ cmd->name = $3;
+ cmd->bound = (Node *) $4;
+ n->def = (Node *) cmd;
+
+ $$ = (Node *) n;
+ }
+ /* ALTER TABLE DETACH PARTITION */
+ | DETACH PARTITION qualified_name
+ {
+ AlterTableCmd *n = makeNode(AlterTableCmd);
+ PartitionCmd *cmd = makeNode(PartitionCmd);
+
+ n->subtype = AT_DetachPartition;
+ cmd->name = $3;
+ n->def = (Node *) cmd;
+
+ $$ = (Node *) n;
+ }
+ ;
+
alter_table_cmd:
/* ALTER TABLE ADD */
ADD_P columnDef
@@ -2467,6 +2528,73 @@ reloption_elem:
}
;
+ForValues:
+ /* a LIST partition */
+ FOR VALUES IN_P '(' partbound_datum_list ')'
+ {
+ PartitionBoundSpec *n = makeNode(PartitionBoundSpec);
+
+ n->strategy = PARTITION_STRATEGY_LIST;
+ n->listdatums = $5;
+ n->location = @3;
+
+ $$ = (Node *) n;
+ }
+
+ /* a RANGE partition */
+ | FOR VALUES FROM '(' range_datum_list ')' TO '(' range_datum_list ')'
+ {
+ PartitionBoundSpec *n = makeNode(PartitionBoundSpec);
+
+ n->strategy = PARTITION_STRATEGY_RANGE;
+ n->lowerdatums = $5;
+ n->upperdatums = $9;
+ n->location = @3;
+
+ $$ = (Node *) n;
+ }
+ ;
+
+partbound_datum:
+ Sconst { $$ = makeStringConst($1, @1); }
+ | NumericOnly { $$ = makeAConst($1, @1); }
+ | NULL_P { $$ = makeNullAConst(@1); }
+ ;
+
+partbound_datum_list:
+ partbound_datum { $$ = list_make1($1); }
+ | partbound_datum_list ',' partbound_datum
+ { $$ = lappend($1, $3); }
+ ;
+
+range_datum_list:
+ PartitionRangeDatum { $$ = list_make1($1); }
+ | range_datum_list ',' PartitionRangeDatum
+ { $$ = lappend($1, $3); }
+ ;
+
+PartitionRangeDatum:
+ UNBOUNDED
+ {
+ PartitionRangeDatum *n = makeNode(PartitionRangeDatum);
+
+ n->infinite = true;
+ n->value = NULL;
+ n->location = @1;
+
+ $$ = n;
+ }
+ | partbound_datum
+ {
+ PartitionRangeDatum *n = makeNode(PartitionRangeDatum);
+
+ n->infinite = false;
+ n->value = $1;
+ n->location = @1;
+
+ $$ = n;
+ }
+ ;
/*****************************************************************************
*
@@ -2812,69 +2940,113 @@ copy_generic_opt_arg_list_item:
*****************************************************************************/
CreateStmt: CREATE OptTemp TABLE qualified_name '(' OptTableElementList ')'
- OptInherit OptWith OnCommitOption OptTableSpace
+ OptInherit OptPartitionSpec OptWith OnCommitOption OptTableSpace
{
CreateStmt *n = makeNode(CreateStmt);
$4->relpersistence = $2;
n->relation = $4;
n->tableElts = $6;
n->inhRelations = $8;
+ n->partspec = $9;
n->ofTypename = NULL;
n->constraints = NIL;
- n->options = $9;
- n->oncommit = $10;
- n->tablespacename = $11;
+ n->options = $10;
+ n->oncommit = $11;
+ n->tablespacename = $12;
n->if_not_exists = false;
$$ = (Node *)n;
}
| CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name '('
- OptTableElementList ')' OptInherit OptWith OnCommitOption
- OptTableSpace
+ OptTableElementList ')' OptInherit OptPartitionSpec OptWith
+ OnCommitOption OptTableSpace
{
CreateStmt *n = makeNode(CreateStmt);
$7->relpersistence = $2;
n->relation = $7;
n->tableElts = $9;
n->inhRelations = $11;
+ n->partspec = $12;
n->ofTypename = NULL;
n->constraints = NIL;
- n->options = $12;
- n->oncommit = $13;
- n->tablespacename = $14;
+ n->options = $13;
+ n->oncommit = $14;
+ n->tablespacename = $15;
n->if_not_exists = true;
$$ = (Node *)n;
}
| CREATE OptTemp TABLE qualified_name OF any_name
- OptTypedTableElementList OptWith OnCommitOption OptTableSpace
+ OptTypedTableElementList OptPartitionSpec OptWith OnCommitOption
+ OptTableSpace
{
CreateStmt *n = makeNode(CreateStmt);
$4->relpersistence = $2;
n->relation = $4;
n->tableElts = $7;
n->inhRelations = NIL;
+ n->partspec = $8;
n->ofTypename = makeTypeNameFromNameList($6);
n->ofTypename->location = @6;
n->constraints = NIL;
- n->options = $8;
- n->oncommit = $9;
- n->tablespacename = $10;
+ n->options = $9;
+ n->oncommit = $10;
+ n->tablespacename = $11;
n->if_not_exists = false;
$$ = (Node *)n;
}
| CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name OF any_name
- OptTypedTableElementList OptWith OnCommitOption OptTableSpace
+ OptTypedTableElementList OptPartitionSpec OptWith OnCommitOption
+ OptTableSpace
{
CreateStmt *n = makeNode(CreateStmt);
$7->relpersistence = $2;
n->relation = $7;
n->tableElts = $10;
n->inhRelations = NIL;
+ n->partspec = $11;
n->ofTypename = makeTypeNameFromNameList($9);
n->ofTypename->location = @9;
n->constraints = NIL;
+ n->options = $12;
+ n->oncommit = $13;
+ n->tablespacename = $14;
+ n->if_not_exists = true;
+ $$ = (Node *)n;
+ }
+ | CREATE OptTemp TABLE qualified_name PARTITION OF qualified_name
+ OptPartitionElementList ForValues OptPartitionSpec OptWith
+ OnCommitOption OptTableSpace
+ {
+ CreateStmt *n = makeNode(CreateStmt);
+ $4->relpersistence = $2;
+ n->relation = $4;
+ n->tableElts = $8;
+ n->inhRelations = list_make1($7);
+ n->partbound = (Node *) $9;
+ n->partspec = $10;
+ n->ofTypename = NULL;
+ n->constraints = NIL;
n->options = $11;
n->oncommit = $12;
n->tablespacename = $13;
+ n->if_not_exists = false;
+ $$ = (Node *)n;
+ }
+ | CREATE OptTemp TABLE IF_P NOT EXISTS qualified_name PARTITION OF
+ qualified_name OptPartitionElementList ForValues OptPartitionSpec
+ OptWith OnCommitOption OptTableSpace
+ {
+ CreateStmt *n = makeNode(CreateStmt);
+ $7->relpersistence = $2;
+ n->relation = $7;
+ n->tableElts = $11;
+ n->inhRelations = list_make1($10);
+ n->partbound = (Node *) $12;
+ n->partspec = $13;
+ n->ofTypename = NULL;
+ n->constraints = NIL;
+ n->options = $14;
+ n->oncommit = $15;
+ n->tablespacename = $16;
n->if_not_exists = true;
$$ = (Node *)n;
}
@@ -2923,6 +3095,11 @@ OptTypedTableElementList:
| /*EMPTY*/ { $$ = NIL; }
;
+OptPartitionElementList:
+ '(' PartitionElementList ')' { $$ = $2; }
+ | /*EMPTY*/ { $$ = NIL; }
+ ;
+
TableElementList:
TableElement
{
@@ -2945,6 +3122,17 @@ TypedTableElementList:
}
;
+PartitionElementList:
+ PartitionElement
+ {
+ $$ = list_make1($1);
+ }
+ | PartitionElementList ',' PartitionElement
+ {
+ $$ = lappend($1, $3);
+ }
+ ;
+
TableElement:
columnDef { $$ = $1; }
| TableLikeClause { $$ = $1; }
@@ -2956,6 +3144,28 @@ TypedTableElement:
| TableConstraint { $$ = $1; }
;
+PartitionElement:
+ TableConstraint { $$ = $1; }
+ | ColId ColQualList
+ {
+ ColumnDef *n = makeNode(ColumnDef);
+ n->colname = $1;
+ n->typeName = NULL;
+ n->inhcount = 0;
+ n->is_local = true;
+ n->is_not_null = false;
+ n->is_from_type = false;
+ n->storage = 0;
+ n->raw_default = NULL;
+ n->cooked_default = NULL;
+ n->collOid = InvalidOid;
+ SplitColQualList($2, &n->constraints, &n->collClause,
+ yyscanner);
+ n->location = @1;
+ $$ = (Node *) n;
+ }
+ ;
+
columnDef: ColId Typename create_generic_options ColQualList
{
ColumnDef *n = makeNode(ColumnDef);
@@ -3419,6 +3629,65 @@ OptInherit: INHERITS '(' qualified_name_list ')' { $$ = $3; }
| /*EMPTY*/ { $$ = NIL; }
;
+/* Optional partition key specification */
+OptPartitionSpec: PartitionSpec { $$ = $1; }
+ | /*EMPTY*/ { $$ = NULL; }
+ ;
+
+PartitionSpec: PARTITION BY part_strategy '(' part_params ')'
+ {
+ PartitionSpec *n = makeNode(PartitionSpec);
+
+ n->strategy = $3;
+ n->partParams = $5;
+ n->location = @1;
+
+ $$ = n;
+ }
+ ;
+
+part_strategy: IDENT { $$ = $1; }
+ | unreserved_keyword { $$ = pstrdup($1); }
+ ;
+
+part_params: part_elem { $$ = list_make1($1); }
+ | part_params ',' part_elem { $$ = lappend($1, $3); }
+ ;
+
+part_elem: ColId opt_collate opt_class
+ {
+ PartitionElem *n = makeNode(PartitionElem);
+
+ n->name = $1;
+ n->expr = NULL;
+ n->collation = $2;
+ n->opclass = $3;
+ n->location = @1;
+ $$ = n;
+ }
+ | func_expr_windowless opt_collate opt_class
+ {
+ PartitionElem *n = makeNode(PartitionElem);
+
+ n->name = NULL;
+ n->expr = $1;
+ n->collation = $2;
+ n->opclass = $3;
+ n->location = @1;
+ $$ = n;
+ }
+ | '(' a_expr ')' opt_collate opt_class
+ {
+ PartitionElem *n = makeNode(PartitionElem);
+
+ n->name = NULL;
+ n->expr = $2;
+ n->collation = $4;
+ n->opclass = $5;
+ n->location = @1;
+ $$ = n;
+ }
+ ;
/* WITH (options) is preferred, WITH OIDS and WITHOUT OIDS are legacy forms */
OptWith:
WITH reloptions { $$ = $2; }
@@ -4484,6 +4753,48 @@ CreateForeignTableStmt:
n->options = $14;
$$ = (Node *) n;
}
+ | CREATE FOREIGN TABLE qualified_name
+ PARTITION OF qualified_name OptPartitionElementList ForValues
+ SERVER name create_generic_options
+ {
+ CreateForeignTableStmt *n = makeNode(CreateForeignTableStmt);
+ $4->relpersistence = RELPERSISTENCE_PERMANENT;
+ n->base.relation = $4;
+ n->base.inhRelations = list_make1($7);
+ n->base.tableElts = $8;
+ n->base.partbound = (Node *) $9;
+ n->base.ofTypename = NULL;
+ n->base.constraints = NIL;
+ n->base.options = NIL;
+ n->base.oncommit = ONCOMMIT_NOOP;
+ n->base.tablespacename = NULL;
+ n->base.if_not_exists = false;
+ /* FDW-specific data */
+ n->servername = $11;
+ n->options = $12;
+ $$ = (Node *) n;
+ }
+ | CREATE FOREIGN TABLE IF_P NOT EXISTS qualified_name
+ PARTITION OF qualified_name OptPartitionElementList ForValues
+ SERVER name create_generic_options
+ {
+ CreateForeignTableStmt *n = makeNode(CreateForeignTableStmt);
+ $7->relpersistence = RELPERSISTENCE_PERMANENT;
+ n->base.relation = $7;
+ n->base.inhRelations = list_make1($10);
+ n->base.tableElts = $11;
+ n->base.partbound = (Node *) $12;
+ n->base.ofTypename = NULL;
+ n->base.constraints = NIL;
+ n->base.options = NIL;
+ n->base.oncommit = ONCOMMIT_NOOP;
+ n->base.tablespacename = NULL;
+ n->base.if_not_exists = true;
+ /* FDW-specific data */
+ n->servername = $14;
+ n->options = $15;
+ $$ = (Node *) n;
+ }
;
/*****************************************************************************
@@ -13703,6 +14014,7 @@ unreserved_keyword:
| ASSERTION
| ASSIGNMENT
| AT
+ | ATTACH
| ATTRIBUTE
| BACKWARD
| BEFORE
@@ -13749,6 +14061,7 @@ unreserved_keyword:
| DELIMITER
| DELIMITERS
| DEPENDS
+ | DETACH
| DICTIONARY
| DISABLE_P
| DISCARD
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
index 481a4ddc48..92d1577030 100644
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -501,6 +501,13 @@ check_agglevels_and_constraints(ParseState *pstate, Node *expr)
err = _("grouping operations are not allowed in trigger WHEN conditions");
break;
+ case EXPR_KIND_PARTITION_EXPRESSION:
+ if (isAgg)
+ err = _("aggregate functions are not allowed in partition key expression");
+ else
+ err = _("grouping operations are not allowed in partition key expression");
+
+ break;
/*
* There is intentionally no default: case here, so that the
@@ -858,6 +865,9 @@ transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc,
case EXPR_KIND_TRIGGER_WHEN:
err = _("window functions are not allowed in trigger WHEN conditions");
break;
+ case EXPR_KIND_PARTITION_EXPRESSION:
+ err = _("window functions are not allowed in partition key expression");
+ break;
/*
* There is intentionally no default: case here, so that the
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 17d1cbf8b3..8a2bdf06e8 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -1843,6 +1843,9 @@ transformSubLink(ParseState *pstate, SubLink *sublink)
case EXPR_KIND_TRIGGER_WHEN:
err = _("cannot use subquery in trigger WHEN condition");
break;
+ case EXPR_KIND_PARTITION_EXPRESSION:
+ err = _("cannot use subquery in partition key expression");
+ break;
/*
* There is intentionally no default: case here, so that the
@@ -3446,6 +3449,8 @@ ParseExprKindName(ParseExprKind exprKind)
return "EXECUTE";
case EXPR_KIND_TRIGGER_WHEN:
return "WHEN";
+ case EXPR_KIND_PARTITION_EXPRESSION:
+ return "PARTITION BY";
/*
* There is intentionally no default: case here, so that the
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c
index 56c9a4293d..7d9b4157d4 100644
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -2166,6 +2166,9 @@ check_srf_call_placement(ParseState *pstate, int location)
case EXPR_KIND_TRIGGER_WHEN:
err = _("set-returning functions are not allowed in trigger WHEN conditions");
break;
+ case EXPR_KIND_PARTITION_EXPRESSION:
+ err = _("set-returning functions are not allowed in partition key expression");
+ break;
/*
* There is intentionally no default: case here, so that the
diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c
index 0670bc2482..cc6a961bb4 100644
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -47,8 +47,10 @@
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
+#include "optimizer/planner.h"
#include "parser/analyze.h"
#include "parser/parse_clause.h"
+#include "parser/parse_coerce.h"
#include "parser/parse_collate.h"
#include "parser/parse_expr.h"
#include "parser/parse_relation.h"
@@ -62,6 +64,7 @@
#include "utils/guc.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
+#include "utils/ruleutils.h"
#include "utils/syscache.h"
#include "utils/typcache.h"
@@ -87,6 +90,8 @@ typedef struct
List *alist; /* "after list" of things to do after creating
* the table */
IndexStmt *pkey; /* PRIMARY KEY index, if any */
+ bool ispartitioned; /* true if table is partitioned */
+ Node *partbound; /* transformed FOR VALUES */
} CreateStmtContext;
/* State shared by transformCreateSchemaStmt and its subroutines */
@@ -129,6 +134,7 @@ static void transformConstraintAttrs(CreateStmtContext *cxt,
List *constraintList);
static void transformColumnType(CreateStmtContext *cxt, ColumnDef *column);
static void setSchemaName(char *context_schema, char **stmt_schema_name);
+static void transformAttachPartition(CreateStmtContext *cxt, PartitionCmd *cmd);
/*
@@ -229,6 +235,7 @@ transformCreateStmt(CreateStmt *stmt, const char *queryString)
cxt.blist = NIL;
cxt.alist = NIL;
cxt.pkey = NULL;
+ cxt.ispartitioned = stmt->partspec != NULL;
/*
* Notice that we allow OIDs here only for plain tables, even though
@@ -247,6 +254,28 @@ transformCreateStmt(CreateStmt *stmt, const char *queryString)
if (stmt->ofTypename)
transformOfType(&cxt, stmt->ofTypename);
+ if (stmt->partspec)
+ {
+ int partnatts = list_length(stmt->partspec->partParams);
+
+ if (stmt->inhRelations && !stmt->partbound)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("cannot create partitioned table as inheritance child")));
+
+ if (partnatts > PARTITION_MAX_KEYS)
+ ereport(ERROR,
+ (errcode(ERRCODE_TOO_MANY_COLUMNS),
+ errmsg("cannot partition using more than %d columns",
+ PARTITION_MAX_KEYS)));
+
+ if (!pg_strcasecmp(stmt->partspec->strategy, "list") &&
+ partnatts > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("cannot list partition using more than one column")));
+ }
+
/*
* Run through each primary element in the table creation clause. Separate
* column defs from constraints, and do preliminary analysis. We have to
@@ -583,6 +612,12 @@ transformColumnDefinition(CreateStmtContext *cxt, ColumnDef *column)
errmsg("primary key constraints are not supported on foreign tables"),
parser_errposition(cxt->pstate,
constraint->location)));
+ if (cxt->ispartitioned)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("primary key constraints are not supported on partitioned tables"),
+ parser_errposition(cxt->pstate,
+ constraint->location)));
/* FALL THRU */
case CONSTR_UNIQUE:
@@ -592,6 +627,12 @@ transformColumnDefinition(CreateStmtContext *cxt, ColumnDef *column)
errmsg("unique constraints are not supported on foreign tables"),
parser_errposition(cxt->pstate,
constraint->location)));
+ if (cxt->ispartitioned)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("unique constraints are not supported on partitioned tables"),
+ parser_errposition(cxt->pstate,
+ constraint->location)));
if (constraint->keys == NIL)
constraint->keys = list_make1(makeString(column->colname));
cxt->ixconstraints = lappend(cxt->ixconstraints, constraint);
@@ -609,6 +650,12 @@ transformColumnDefinition(CreateStmtContext *cxt, ColumnDef *column)
errmsg("foreign key constraints are not supported on foreign tables"),
parser_errposition(cxt->pstate,
constraint->location)));
+ if (cxt->ispartitioned)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("foreign key constraints are not supported on partitioned tables"),
+ parser_errposition(cxt->pstate,
+ constraint->location)));
/*
* Fill in the current attribute's name and throw it into the
@@ -674,6 +721,12 @@ transformTableConstraint(CreateStmtContext *cxt, Constraint *constraint)
errmsg("primary key constraints are not supported on foreign tables"),
parser_errposition(cxt->pstate,
constraint->location)));
+ if (cxt->ispartitioned)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("primary key constraints are not supported on partitioned tables"),
+ parser_errposition(cxt->pstate,
+ constraint->location)));
cxt->ixconstraints = lappend(cxt->ixconstraints, constraint);
break;
@@ -684,6 +737,12 @@ transformTableConstraint(CreateStmtContext *cxt, Constraint *constraint)
errmsg("unique constraints are not supported on foreign tables"),
parser_errposition(cxt->pstate,
constraint->location)));
+ if (cxt->ispartitioned)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("unique constraints are not supported on partitioned tables"),
+ parser_errposition(cxt->pstate,
+ constraint->location)));
cxt->ixconstraints = lappend(cxt->ixconstraints, constraint);
break;
@@ -694,6 +753,12 @@ transformTableConstraint(CreateStmtContext *cxt, Constraint *constraint)
errmsg("exclusion constraints are not supported on foreign tables"),
parser_errposition(cxt->pstate,
constraint->location)));
+ if (cxt->ispartitioned)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("exclusion constraints are not supported on partitioned tables"),
+ parser_errposition(cxt->pstate,
+ constraint->location)));
cxt->ixconstraints = lappend(cxt->ixconstraints, constraint);
break;
@@ -708,6 +773,12 @@ transformTableConstraint(CreateStmtContext *cxt, Constraint *constraint)
errmsg("foreign key constraints are not supported on foreign tables"),
parser_errposition(cxt->pstate,
constraint->location)));
+ if (cxt->ispartitioned)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("foreign key constraints are not supported on partitioned tables"),
+ parser_errposition(cxt->pstate,
+ constraint->location)));
cxt->fkconstraints = lappend(cxt->fkconstraints, constraint);
break;
@@ -763,7 +834,8 @@ transformTableLikeClause(CreateStmtContext *cxt, TableLikeClause *table_like_cla
relation->rd_rel->relkind != RELKIND_VIEW &&
relation->rd_rel->relkind != RELKIND_MATVIEW &&
relation->rd_rel->relkind != RELKIND_COMPOSITE_TYPE &&
- relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
+ relation->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+ relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table, view, materialized view, composite type, or foreign table",
@@ -1854,7 +1926,8 @@ transformIndexConstraint(Constraint *constraint, CreateStmtContext *cxt)
rel = heap_openrv(inh, AccessShareLock);
/* check user requested inheritance from valid relkind */
if (rel->rd_rel->relkind != RELKIND_RELATION &&
- rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE)
+ rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
+ rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("inherited relation \"%s\" is not a table or foreign table",
@@ -2512,6 +2585,8 @@ transformAlterTableStmt(Oid relid, AlterTableStmt *stmt,
cxt.blist = NIL;
cxt.alist = NIL;
cxt.pkey = NULL;
+ cxt.ispartitioned = (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
+ cxt.partbound = NULL;
/*
* The only subtypes that currently require parse transformation handling
@@ -2594,6 +2669,19 @@ transformAlterTableStmt(Oid relid, AlterTableStmt *stmt,
break;
}
+ case AT_AttachPartition:
+ {
+ PartitionCmd *partcmd = (PartitionCmd *) cmd->def;
+
+ transformAttachPartition(&cxt, partcmd);
+
+ /* assign transformed values */
+ partcmd->bound = cxt.partbound;
+ }
+
+ newcmds = lappend(newcmds, cmd);
+ break;
+
default:
newcmds = lappend(newcmds, cmd);
break;
@@ -2958,3 +3046,237 @@ setSchemaName(char *context_schema, char **stmt_schema_name)
"different from the one being created (%s)",
*stmt_schema_name, context_schema)));
}
+
+/*
+ * transformAttachPartition
+ * Analyze ATTACH PARTITION ... FOR VALUES ...
+ */
+static void
+transformAttachPartition(CreateStmtContext *cxt, PartitionCmd *cmd)
+{
+ Relation parentRel = cxt->rel;
+
+ /*
+ * We are going to try to validate the partition bound specification
+ * against the partition key of rel, so it better have one.
+ */
+ if (parentRel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("\"%s\" is not partitioned",
+ RelationGetRelationName(parentRel))));
+
+ /* tranform the values */
+ Assert(RelationGetPartitionKey(parentRel) != NULL);
+ cxt->partbound = transformPartitionBound(cxt->pstate, parentRel,
+ cmd->bound);
+}
+
+/*
+ * transformPartitionBound
+ *
+ * Transform partition bound specification
+ */
+Node *
+transformPartitionBound(ParseState *pstate, Relation parent, Node *bound)
+{
+ PartitionBoundSpec *spec = (PartitionBoundSpec *) bound,
+ *result_spec;
+ PartitionKey key = RelationGetPartitionKey(parent);
+ char strategy = get_partition_strategy(key);
+ int partnatts = get_partition_natts(key);
+ List *partexprs = get_partition_exprs(key);
+
+ result_spec = copyObject(spec);
+
+ if (strategy == PARTITION_STRATEGY_LIST)
+ {
+ ListCell *cell;
+ char *colname;
+
+ /* Get the only column's name in case we need to output an error */
+ if (key->partattrs[0] != 0)
+ colname = get_relid_attribute_name(RelationGetRelid(parent),
+ key->partattrs[0]);
+ else
+ colname = deparse_expression((Node *) linitial(partexprs),
+ deparse_context_for(RelationGetRelationName(parent),
+ RelationGetRelid(parent)),
+ false, false);
+
+ if (spec->strategy != PARTITION_STRATEGY_LIST)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("invalid bound specification for a list partition"),
+ parser_errposition(pstate, exprLocation(bound))));
+
+ result_spec->listdatums = NIL;
+ foreach(cell, spec->listdatums)
+ {
+ A_Const *con = (A_Const *) lfirst(cell);
+ Node *value;
+ ListCell *cell2;
+ bool duplicate;
+
+ value = (Node *) make_const(pstate, &con->val, con->location);
+ value = coerce_to_target_type(pstate,
+ value, exprType(value),
+ get_partition_col_typid(key, 0),
+ get_partition_col_typmod(key, 0),
+ COERCION_ASSIGNMENT,
+ COERCE_IMPLICIT_CAST,
+ -1);
+
+ if (value == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("specified value cannot be cast to type \"%s\" of column \"%s\"",
+ format_type_be(get_partition_col_typid(key, 0)),
+ colname),
+ parser_errposition(pstate,
+ exprLocation((Node *) con))));
+
+ /* Simplify the expression */
+ value = (Node *) expression_planner((Expr *) value);
+
+ /* Don't add to the result if the value is a duplicate */
+ duplicate = false;
+ foreach(cell2, result_spec->listdatums)
+ {
+ Const *value2 = (Const *) lfirst(cell2);
+
+ if (equal(value, value2))
+ {
+ duplicate = true;
+ break;
+ }
+ }
+ if (duplicate)
+ continue;
+
+ result_spec->listdatums = lappend(result_spec->listdatums,
+ value);
+ }
+ }
+ else if (strategy == PARTITION_STRATEGY_RANGE)
+ {
+ ListCell *cell1,
+ *cell2;
+ int i,
+ j;
+ char *colname;
+
+ if (spec->strategy != PARTITION_STRATEGY_RANGE)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("invalid bound specification for a range partition"),
+ parser_errposition(pstate, exprLocation(bound))));
+
+ Assert(spec->lowerdatums != NIL && spec->upperdatums != NIL);
+
+ if (list_length(spec->lowerdatums) != partnatts)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("FROM must specify exactly one value per partitioning column")));
+ if (list_length(spec->upperdatums) != partnatts)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("TO must specify exactly one value per partitioning column")));
+
+ i = j = 0;
+ result_spec->lowerdatums = result_spec->upperdatums = NIL;
+ forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums)
+ {
+ PartitionRangeDatum *ldatum,
+ *rdatum;
+ Node *value;
+ A_Const *lcon = NULL,
+ *rcon = NULL;
+
+ ldatum = (PartitionRangeDatum *) lfirst(cell1);
+ rdatum = (PartitionRangeDatum *) lfirst(cell2);
+ /* Get the column's name in case we need to output an error */
+ if (key->partattrs[i] != 0)
+ colname = get_relid_attribute_name(RelationGetRelid(parent),
+ key->partattrs[i]);
+ else
+ {
+ colname = deparse_expression((Node *) list_nth(partexprs, j),
+ deparse_context_for(RelationGetRelationName(parent),
+ RelationGetRelid(parent)),
+ false, false);
+ ++j;
+ }
+
+ if (!ldatum->infinite)
+ lcon = (A_Const *) ldatum->value;
+ if (!rdatum->infinite)
+ rcon = (A_Const *) rdatum->value;
+
+ if (lcon)
+ {
+ value = (Node *) make_const(pstate, &lcon->val, lcon->location);
+ if (((Const *) value)->constisnull)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("cannot specify NULL in range bound")));
+ value = coerce_to_target_type(pstate,
+ value, exprType(value),
+ get_partition_col_typid(key, i),
+ get_partition_col_typmod(key, i),
+ COERCION_ASSIGNMENT,
+ COERCE_IMPLICIT_CAST,
+ -1);
+ if (value == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("specified value cannot be cast to type \"%s\" of column \"%s\"",
+ format_type_be(get_partition_col_typid(key, i)),
+ colname),
+ parser_errposition(pstate, exprLocation((Node *) ldatum))));
+
+ /* Simplify the expression */
+ value = (Node *) expression_planner((Expr *) value);
+ ldatum->value = value;
+ }
+
+ if (rcon)
+ {
+ value = (Node *) make_const(pstate, &rcon->val, rcon->location);
+ if (((Const *) value)->constisnull)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("cannot specify NULL in range bound")));
+ value = coerce_to_target_type(pstate,
+ value, exprType(value),
+ get_partition_col_typid(key, i),
+ get_partition_col_typmod(key, i),
+ COERCION_ASSIGNMENT,
+ COERCE_IMPLICIT_CAST,
+ -1);
+ if (value == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("specified value cannot be cast to type \"%s\" of column \"%s\"",
+ format_type_be(get_partition_col_typid(key, i)),
+ colname),
+ parser_errposition(pstate, exprLocation((Node *) rdatum))));
+
+ /* Simplify the expression */
+ value = (Node *) expression_planner((Expr *) value);
+ rdatum->value = value;
+ }
+
+ result_spec->lowerdatums = lappend(result_spec->lowerdatums,
+ copyObject(ldatum));
+ result_spec->upperdatums = lappend(result_spec->upperdatums,
+ copyObject(rdatum));
+
+ ++i;
+ }
+ }
+ else
+ elog(ERROR, "unexpected partition strategy: %d", (int) strategy);
+
+ return (Node *) result_spec;
+}
diff --git a/src/backend/rewrite/rewriteDefine.c b/src/backend/rewrite/rewriteDefine.c
index f82d891c34..32e1328149 100644
--- a/src/backend/rewrite/rewriteDefine.c
+++ b/src/backend/rewrite/rewriteDefine.c
@@ -261,7 +261,8 @@ DefineQueryRewrite(char *rulename,
*/
if (event_relation->rd_rel->relkind != RELKIND_RELATION &&
event_relation->rd_rel->relkind != RELKIND_MATVIEW &&
- event_relation->rd_rel->relkind != RELKIND_VIEW)
+ event_relation->rd_rel->relkind != RELKIND_VIEW &&
+ event_relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table or view",
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index 65c3d6e081..bf4f098c15 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -1231,7 +1231,8 @@ rewriteTargetListUD(Query *parsetree, RangeTblEntry *target_rte,
TargetEntry *tle;
if (target_relation->rd_rel->relkind == RELKIND_RELATION ||
- target_relation->rd_rel->relkind == RELKIND_MATVIEW)
+ target_relation->rd_rel->relkind == RELKIND_MATVIEW ||
+ target_relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
/*
* Emit CTID so that executor can find the row to update or delete.
diff --git a/src/backend/rewrite/rowsecurity.c b/src/backend/rewrite/rowsecurity.c
index b7edefc7dd..e38586dd80 100644
--- a/src/backend/rewrite/rowsecurity.c
+++ b/src/backend/rewrite/rowsecurity.c
@@ -121,7 +121,8 @@ get_row_security_policies(Query *root, RangeTblEntry *rte, int rt_index,
*hasSubLinks = false;
/* If this is not a normal relation, just return immediately */
- if (rte->relkind != RELKIND_RELATION)
+ if (rte->relkind != RELKIND_RELATION &&
+ rte->relkind != RELKIND_PARTITIONED_TABLE)
return;
/* Switch to checkAsUser if it's set */
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index f50ce408ae..fd4eff4907 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -987,7 +987,8 @@ ProcessUtilitySlow(ParseState *pstate,
/* Create the table itself */
address = DefineRelation((CreateStmt *) stmt,
RELKIND_RELATION,
- InvalidOid, NULL);
+ InvalidOid, NULL,
+ queryString);
EventTriggerCollectSimpleCommand(address,
secondaryObject,
stmt);
@@ -1020,7 +1021,8 @@ ProcessUtilitySlow(ParseState *pstate,
/* Create the table itself */
address = DefineRelation((CreateStmt *) stmt,
RELKIND_FOREIGN_TABLE,
- InvalidOid, NULL);
+ InvalidOid, NULL,
+ queryString);
CreateForeignTable((CreateForeignTableStmt *) stmt,
address.objectId);
EventTriggerCollectSimpleCommand(address,
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index fecee85e5b..4e2ba19d1b 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -33,6 +33,7 @@
#include "catalog/pg_language.h"
#include "catalog/pg_opclass.h"
#include "catalog/pg_operator.h"
+#include "catalog/pg_partitioned_table.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_trigger.h"
#include "catalog/pg_type.h"
@@ -315,6 +316,7 @@ static char *pg_get_indexdef_worker(Oid indexrelid, int colno,
const Oid *excludeOps,
bool attrsOnly, bool showTblSpc,
int prettyFlags, bool missing_ok);
+static char *pg_get_partkeydef_worker(Oid relid, int prettyFlags);
static char *pg_get_constraintdef_worker(Oid constraintId, bool fullCommand,
int prettyFlags, bool missing_ok);
static text *pg_get_expr_worker(text *expr, Oid relid, const char *relname,
@@ -1415,6 +1417,163 @@ pg_get_indexdef_worker(Oid indexrelid, int colno,
return buf.data;
}
+/*
+ * pg_get_partkeydef
+ *
+ * Returns the partition key specification, ie, the following:
+ *
+ * PARTITION BY { RANGE | LIST } (column opt_collation opt_opclass [, ...])
+ */
+Datum
+pg_get_partkeydef(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+
+ PG_RETURN_TEXT_P(string_to_text(pg_get_partkeydef_worker(relid,
+ PRETTYFLAG_INDENT)));
+}
+
+/*
+ * Internal workhorse to decompile a partition key definition.
+ */
+static char *
+pg_get_partkeydef_worker(Oid relid, int prettyFlags)
+{
+ Form_pg_partitioned_table form;
+ HeapTuple tuple;
+ oidvector *partclass;
+ oidvector *partcollation;
+ List *partexprs;
+ ListCell *partexpr_item;
+ List *context;
+ Datum datum;
+ bool isnull;
+ StringInfoData buf;
+ int keyno;
+ char *str;
+ char *sep;
+
+ tuple = SearchSysCache1(PARTRELID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for partition key of %u", relid);
+
+ form = (Form_pg_partitioned_table) GETSTRUCT(tuple);
+
+ Assert(form->partrelid == relid);
+
+ /* Must get partclass and partcollation the hard way */
+ datum = SysCacheGetAttr(PARTRELID, tuple,
+ Anum_pg_partitioned_table_partclass, &isnull);
+ Assert(!isnull);
+ partclass = (oidvector *) DatumGetPointer(datum);
+
+ datum = SysCacheGetAttr(PARTRELID, tuple,
+ Anum_pg_partitioned_table_partcollation, &isnull);
+ Assert(!isnull);
+ partcollation = (oidvector *) DatumGetPointer(datum);
+
+
+ /*
+ * Get the expressions, if any. (NOTE: we do not use the relcache
+ * versions of the expressions, because we want to display non-const-folded
+ * expressions.)
+ */
+ if (!heap_attisnull(tuple, Anum_pg_partitioned_table_partexprs))
+ {
+ Datum exprsDatum;
+ bool isnull;
+ char *exprsString;
+
+ exprsDatum = SysCacheGetAttr(PARTRELID, tuple,
+ Anum_pg_partitioned_table_partexprs, &isnull);
+ Assert(!isnull);
+ exprsString = TextDatumGetCString(exprsDatum);
+ partexprs = (List *) stringToNode(exprsString);
+
+ if (!IsA(partexprs, List))
+ elog(ERROR, "unexpected node type found in partexprs: %d",
+ (int) nodeTag(partexprs));
+
+ pfree(exprsString);
+ }
+ else
+ partexprs = NIL;
+
+ partexpr_item = list_head(partexprs);
+ context = deparse_context_for(get_relation_name(relid), relid);
+
+ initStringInfo(&buf);
+
+ switch (form->partstrat)
+ {
+ case PARTITION_STRATEGY_LIST:
+ appendStringInfo(&buf, "LIST");
+ break;
+ case PARTITION_STRATEGY_RANGE:
+ appendStringInfo(&buf, "RANGE");
+ break;
+ default:
+ elog(ERROR, "unexpected partition strategy: %d",
+ (int) form->partstrat);
+ }
+
+ appendStringInfo(&buf, " (");
+ sep = "";
+ for (keyno = 0; keyno < form->partnatts; keyno++)
+ {
+ AttrNumber attnum = form->partattrs.values[keyno];
+ Oid keycoltype;
+ Oid keycolcollation;
+ Oid partcoll;
+
+ appendStringInfoString(&buf, sep);
+ sep = ", ";
+ if (attnum != 0)
+ {
+ /* Simple attribute reference */
+ char *attname;
+ int32 keycoltypmod;
+
+ attname = get_relid_attribute_name(relid, attnum);
+ appendStringInfoString(&buf, quote_identifier(attname));
+ get_atttypetypmodcoll(relid, attnum,
+ &keycoltype, &keycoltypmod,
+ &keycolcollation);
+ }
+ else
+ {
+ /* Expression */
+ Node *partkey;
+
+ if (partexpr_item == NULL)
+ elog(ERROR, "too few entries in partexprs list");
+ partkey = (Node *) lfirst(partexpr_item);
+ partexpr_item = lnext(partexpr_item);
+ /* Deparse */
+ str = deparse_expression_pretty(partkey, context, false, false,
+ 0, 0);
+
+ appendStringInfoString(&buf, str);
+ keycoltype = exprType(partkey);
+ keycolcollation = exprCollation(partkey);
+ }
+
+ /* Add collation, if not default for column */
+ partcoll = partcollation->values[keyno];
+ if (OidIsValid(partcoll) && partcoll != keycolcollation)
+ appendStringInfo(&buf, " COLLATE %s",
+ generate_collation_name((partcoll)));
+
+ /* Add the operator class name, if not default */
+ get_opclass_name(partclass->values[keyno], keycoltype, &buf);
+ }
+ appendStringInfoChar(&buf, ')');
+
+ /* Clean up */
+ ReleaseSysCache(tuple);
+
+ return buf.data;
+}
/*
* pg_get_constraintdef
@@ -8291,6 +8450,88 @@ get_rule_expr(Node *node, deparse_context *context,
}
break;
+ case T_PartitionBoundSpec:
+ {
+ PartitionBoundSpec *spec = (PartitionBoundSpec *) node;
+ ListCell *cell;
+ char *sep;
+
+ switch (spec->strategy)
+ {
+ case PARTITION_STRATEGY_LIST:
+ Assert(spec->listdatums != NIL);
+
+ appendStringInfoString(buf, "FOR VALUES");
+ appendStringInfoString(buf, " IN (");
+ sep = "";
+ foreach (cell, spec->listdatums)
+ {
+ Const *val = lfirst(cell);
+
+ appendStringInfoString(buf, sep);
+ get_const_expr(val, context, -1);
+ sep = ", ";
+ }
+
+ appendStringInfoString(buf, ")");
+ break;
+
+ case PARTITION_STRATEGY_RANGE:
+ Assert(spec->lowerdatums != NIL &&
+ spec->upperdatums != NIL &&
+ list_length(spec->lowerdatums) ==
+ list_length(spec->upperdatums));
+
+ appendStringInfoString(buf, "FOR VALUES");
+ appendStringInfoString(buf, " FROM");
+ appendStringInfoString(buf, " (");
+ sep = "";
+ foreach (cell, spec->lowerdatums)
+ {
+ PartitionRangeDatum *datum = lfirst(cell);
+ Const *val;
+
+ appendStringInfoString(buf, sep);
+ if (datum->infinite)
+ appendStringInfoString(buf, "UNBOUNDED");
+ else
+ {
+ val = (Const *) datum->value;
+ get_const_expr(val, context, -1);
+ }
+ sep = ", ";
+ }
+ appendStringInfoString(buf, ")");
+
+ appendStringInfoString(buf, " TO");
+ appendStringInfoString(buf, " (");
+ sep = "";
+ foreach (cell, spec->upperdatums)
+ {
+ PartitionRangeDatum *datum = lfirst(cell);
+ Const *val;
+
+ appendStringInfoString(buf, sep);
+ if (datum->infinite)
+ appendStringInfoString(buf, "UNBOUNDED");
+ else
+ {
+ val = (Const *) datum->value;
+ get_const_expr(val, context, -1);
+ }
+ sep = ", ";
+ }
+ appendStringInfoString(buf, ")");
+ break;
+
+ default:
+ elog(ERROR, "unrecognized partition strategy: %d",
+ (int) spec->strategy);
+ break;
+ }
+ }
+ break;
+
case T_List:
{
char *sep;
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 79e0b1ff48..2a6835991c 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -32,6 +32,7 @@
#include "access/htup_details.h"
#include "access/multixact.h"
+#include "access/nbtree.h"
#include "access/reloptions.h"
#include "access/sysattr.h"
#include "access/xact.h"
@@ -40,6 +41,7 @@
#include "catalog/index.h"
#include "catalog/indexing.h"
#include "catalog/namespace.h"
+#include "catalog/partition.h"
#include "catalog/pg_am.h"
#include "catalog/pg_amproc.h"
#include "catalog/pg_attrdef.h"
@@ -49,6 +51,7 @@
#include "catalog/pg_database.h"
#include "catalog/pg_namespace.h"
#include "catalog/pg_opclass.h"
+#include "catalog/pg_partitioned_table.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_rewrite.h"
#include "catalog/pg_shseclabel.h"
@@ -258,6 +261,8 @@ static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_hi
static Relation AllocateRelationDesc(Form_pg_class relp);
static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
static void RelationBuildTupleDesc(Relation relation);
+static void RelationBuildPartitionKey(Relation relation);
+static PartitionKey copy_partition_key(PartitionKey fromkey);
static Relation RelationBuildDesc(Oid targetRelId, bool insertIt);
static void RelationInitPhysicalAddr(Relation relation);
static void load_critical_index(Oid indexoid, Oid heapoid);
@@ -278,6 +283,8 @@ static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
StrategyNumber numSupport);
static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
static void unlink_initfile(const char *initfilename);
+static bool equalPartitionDescs(PartitionKey key, PartitionDesc partdesc1,
+ PartitionDesc partdesc2);
/*
@@ -435,6 +442,7 @@ RelationParseRelOptions(Relation relation, HeapTuple tuple)
case RELKIND_INDEX:
case RELKIND_VIEW:
case RELKIND_MATVIEW:
+ case RELKIND_PARTITIONED_TABLE:
break;
default:
return;
@@ -795,6 +803,237 @@ RelationBuildRuleLock(Relation relation)
relation->rd_rules = rulelock;
}
+/*
+ * RelationBuildPartitionKey
+ * Build and attach to relcache partition key data of relation
+ *
+ * Partitioning key data is stored in CacheMemoryContext to ensure it survives
+ * as long as the relcache. To avoid leaking memory in that context in case
+ * of an error partway through this function, we build the structure in the
+ * working context (which must be short-lived) and copy the completed
+ * structure into the cache memory.
+ *
+ * Also, since the structure being created here is sufficiently complex, we
+ * make a private child context of CacheMemoryContext for each relation that
+ * has associated partition key information. That means no complicated logic
+ * to free individual elements whenever the relcache entry is flushed - just
+ * delete the context.
+ */
+static void
+RelationBuildPartitionKey(Relation relation)
+{
+ Form_pg_partitioned_table form;
+ HeapTuple tuple;
+ bool isnull;
+ int i;
+ PartitionKey key;
+ AttrNumber *attrs;
+ oidvector *opclass;
+ oidvector *collation;
+ ListCell *partexprs_item;
+ Datum datum;
+ MemoryContext partkeycxt,
+ oldcxt;
+
+ tuple = SearchSysCache1(PARTRELID,
+ ObjectIdGetDatum(RelationGetRelid(relation)));
+
+ /*
+ * The following happens when we have created our pg_class entry but not
+ * the pg_partitioned_table entry yet.
+ */
+ if (!HeapTupleIsValid(tuple))
+ return;
+
+ key = (PartitionKey) palloc0(sizeof(PartitionKeyData));
+
+ /* Fixed-length attributes */
+ form = (Form_pg_partitioned_table) GETSTRUCT(tuple);
+ key->strategy = form->partstrat;
+ key->partnatts = form->partnatts;
+
+ /*
+ * We can rely on the first variable-length attribute being mapped to the
+ * relevant field of the catalog's C struct, because all previous
+ * attributes are non-nullable and fixed-length.
+ */
+ attrs = form->partattrs.values;
+
+ /* But use the hard way to retrieve further variable-length attributes */
+ /* Operator class */
+ datum = SysCacheGetAttr(PARTRELID, tuple,
+ Anum_pg_partitioned_table_partclass, &isnull);
+ Assert(!isnull);
+ opclass = (oidvector *) DatumGetPointer(datum);
+
+ /* Collation */
+ datum = SysCacheGetAttr(PARTRELID, tuple,
+ Anum_pg_partitioned_table_partcollation, &isnull);
+ Assert(!isnull);
+ collation = (oidvector *) DatumGetPointer(datum);
+
+ /* Expressions */
+ datum = SysCacheGetAttr(PARTRELID, tuple,
+ Anum_pg_partitioned_table_partexprs, &isnull);
+ if (!isnull)
+ {
+ char *exprString;
+ Node *expr;
+
+ exprString = TextDatumGetCString(datum);
+ expr = stringToNode(exprString);
+ pfree(exprString);
+
+ /*
+ * Run the expressions through const-simplification since the planner
+ * will be comparing them to similarly-processed qual clause operands,
+ * and may fail to detect valid matches without this step. We don't
+ * need to bother with canonicalize_qual() though, because partition
+ * expressions are not full-fledged qualification clauses.
+ */
+ expr = eval_const_expressions(NULL, (Node *) expr);
+
+ /* May as well fix opfuncids too */
+ fix_opfuncids((Node *) expr);
+ key->partexprs = (List *) expr;
+ }
+
+ key->partattrs = (AttrNumber *) palloc0(key->partnatts * sizeof(AttrNumber));
+ key->partopfamily = (Oid *) palloc0(key->partnatts * sizeof(Oid));
+ key->partopcintype = (Oid *) palloc0(key->partnatts * sizeof(Oid));
+ key->partsupfunc = (FmgrInfo *) palloc0(key->partnatts * sizeof(FmgrInfo));
+
+ key->partcollation = (Oid *) palloc0(key->partnatts * sizeof(Oid));
+
+ /* Gather type and collation info as well */
+ key->parttypid = (Oid *) palloc0(key->partnatts * sizeof(Oid));
+ key->parttypmod = (int32 *) palloc0(key->partnatts * sizeof(int32));
+ key->parttyplen = (int16 *) palloc0(key->partnatts * sizeof(int16));
+ key->parttypbyval = (bool *) palloc0(key->partnatts * sizeof(bool));
+ key->parttypalign = (char *) palloc0(key->partnatts * sizeof(char));
+ key->parttypcoll = (Oid *) palloc0(key->partnatts * sizeof(Oid));
+
+ /* Copy partattrs and fill other per-attribute info */
+ memcpy(key->partattrs, attrs, key->partnatts * sizeof(int16));
+ partexprs_item = list_head(key->partexprs);
+ for (i = 0; i < key->partnatts; i++)
+ {
+ AttrNumber attno = key->partattrs[i];
+ HeapTuple opclasstup;
+ Form_pg_opclass opclassform;
+ Oid funcid;
+
+ /* Collect opfamily information */
+ opclasstup = SearchSysCache1(CLAOID,
+ ObjectIdGetDatum(opclass->values[i]));
+ if (!HeapTupleIsValid(opclasstup))
+ elog(ERROR, "cache lookup failed for opclass %u", opclass->values[i]);
+
+ opclassform = (Form_pg_opclass) GETSTRUCT(opclasstup);
+ key->partopfamily[i] = opclassform->opcfamily;
+ key->partopcintype[i] = opclassform->opcintype;
+
+ /*
+ * A btree support function covers the cases of list and range methods
+ * currently supported.
+ */
+ funcid = get_opfamily_proc(opclassform->opcfamily,
+ opclassform->opcintype,
+ opclassform->opcintype,
+ BTORDER_PROC);
+
+ fmgr_info(funcid, &key->partsupfunc[i]);
+
+ /* Collation */
+ key->partcollation[i] = collation->values[i];
+
+ /* Collect type information */
+ if (attno != 0)
+ {
+ key->parttypid[i] = relation->rd_att->attrs[attno - 1]->atttypid;
+ key->parttypmod[i] = relation->rd_att->attrs[attno - 1]->atttypmod;
+ key->parttypcoll[i] = relation->rd_att->attrs[attno - 1]->attcollation;
+ }
+ else
+ {
+ key->parttypid[i] = exprType(lfirst(partexprs_item));
+ key->parttypmod[i] = exprTypmod(lfirst(partexprs_item));
+ key->parttypcoll[i] = exprCollation(lfirst(partexprs_item));
+ }
+ get_typlenbyvalalign(key->parttypid[i],
+ &key->parttyplen[i],
+ &key->parttypbyval[i],
+ &key->parttypalign[i]);
+
+ ReleaseSysCache(opclasstup);
+ }
+
+ ReleaseSysCache(tuple);
+
+ /* Success --- now copy to the cache memory */
+ partkeycxt = AllocSetContextCreate(CacheMemoryContext,
+ RelationGetRelationName(relation),
+ ALLOCSET_SMALL_SIZES);
+ relation->rd_partkeycxt = partkeycxt;
+ oldcxt = MemoryContextSwitchTo(relation->rd_partkeycxt);
+ relation->rd_partkey = copy_partition_key(key);
+ MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * copy_partition_key
+ *
+ * The copy is allocated in the current memory context.
+ */
+static PartitionKey
+copy_partition_key(PartitionKey fromkey)
+{
+ PartitionKey newkey;
+ int n;
+
+ newkey = (PartitionKey) palloc(sizeof(PartitionKeyData));
+
+ newkey->strategy = fromkey->strategy;
+ newkey->partnatts = n = fromkey->partnatts;
+
+ newkey->partattrs = (AttrNumber *) palloc(n * sizeof(AttrNumber));
+ memcpy(newkey->partattrs, fromkey->partattrs, n * sizeof(AttrNumber));
+
+ newkey->partexprs = copyObject(fromkey->partexprs);
+
+ newkey->partopfamily = (Oid *) palloc(n * sizeof(Oid));
+ memcpy(newkey->partopfamily, fromkey->partopfamily, n * sizeof(Oid));
+
+ newkey->partopcintype = (Oid *) palloc(n * sizeof(Oid));
+ memcpy(newkey->partopcintype, fromkey->partopcintype, n * sizeof(Oid));
+
+ newkey->partsupfunc = (FmgrInfo *) palloc(n * sizeof(FmgrInfo));
+ memcpy(newkey->partsupfunc, fromkey->partsupfunc, n * sizeof(FmgrInfo));
+
+ newkey->partcollation = (Oid *) palloc(n * sizeof(Oid));
+ memcpy(newkey->partcollation, fromkey->partcollation, n * sizeof(Oid));
+
+ newkey->parttypid = (Oid *) palloc(n * sizeof(Oid));
+ memcpy(newkey->parttypid, fromkey->parttypid, n * sizeof(Oid));
+
+ newkey->parttypmod = (int32 *) palloc(n * sizeof(int32));
+ memcpy(newkey->parttypmod, fromkey->parttypmod, n * sizeof(int32));
+
+ newkey->parttyplen = (int16 *) palloc(n * sizeof(int16));
+ memcpy(newkey->parttyplen, fromkey->parttyplen, n * sizeof(int16));
+
+ newkey->parttypbyval = (bool *) palloc(n * sizeof(bool));
+ memcpy(newkey->parttypbyval, fromkey->parttypbyval, n * sizeof(bool));
+
+ newkey->parttypalign = (char *) palloc(n * sizeof(bool));
+ memcpy(newkey->parttypalign, fromkey->parttypalign, n * sizeof(char));
+
+ newkey->parttypcoll = (Oid *) palloc(n * sizeof(Oid));
+ memcpy(newkey->parttypcoll, fromkey->parttypcoll, n * sizeof(Oid));
+
+ return newkey;
+}
+
/*
* equalRuleLocks
*
@@ -922,6 +1161,58 @@ equalRSDesc(RowSecurityDesc *rsdesc1, RowSecurityDesc *rsdesc2)
return true;
}
+/*
+ * equalPartitionDescs
+ * Compare two partition descriptors for logical equality
+ */
+static bool
+equalPartitionDescs(PartitionKey key, PartitionDesc partdesc1,
+ PartitionDesc partdesc2)
+{
+ int i;
+
+ if (partdesc1 != NULL)
+ {
+ if (partdesc2 == NULL)
+ return false;
+ if (partdesc1->nparts != partdesc2->nparts)
+ return false;
+
+ Assert(key != NULL || partdesc1->nparts == 0);
+
+ /*
+ * Same oids? If the partitioning structure did not change, that is,
+ * no partitions were added or removed to the relation, the oids array
+ * should still match element-by-element.
+ */
+ for (i = 0; i < partdesc1->nparts; i++)
+ {
+ if (partdesc1->oids[i] != partdesc2->oids[i])
+ return false;
+ }
+
+ /*
+ * Now compare partition bound collections. The logic to iterate over
+ * the collections is private to partition.c.
+ */
+ if (partdesc1->boundinfo != NULL)
+ {
+ if (partdesc2->boundinfo == NULL)
+ return false;
+
+ if (!partition_bounds_equal(key, partdesc1->boundinfo,
+ partdesc2->boundinfo))
+ return false;
+ }
+ else if (partdesc2->boundinfo != NULL)
+ return false;
+ }
+ else if (partdesc2 != NULL)
+ return false;
+
+ return true;
+}
+
/*
* RelationBuildDesc
*
@@ -1050,6 +1341,20 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
relation->rd_fkeylist = NIL;
relation->rd_fkeyvalid = false;
+ /* if a partitioned table, initialize key and partition descriptor info */
+ if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ RelationBuildPartitionKey(relation);
+ RelationBuildPartitionDesc(relation);
+ }
+ else
+ {
+ relation->rd_partkeycxt = NULL;
+ relation->rd_partkey = NULL;
+ relation->rd_partdesc = NULL;
+ relation->rd_pdcxt = NULL;
+ }
+
/*
* if it's an index, initialize index-related information
*/
@@ -2042,6 +2347,12 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc)
MemoryContextDelete(relation->rd_rulescxt);
if (relation->rd_rsdesc)
MemoryContextDelete(relation->rd_rsdesc->rscxt);
+ if (relation->rd_partkeycxt)
+ MemoryContextDelete(relation->rd_partkeycxt);
+ if (relation->rd_pdcxt)
+ MemoryContextDelete(relation->rd_pdcxt);
+ if (relation->rd_partcheck)
+ pfree(relation->rd_partcheck);
if (relation->rd_fdwroutine)
pfree(relation->rd_fdwroutine);
pfree(relation);
@@ -2190,11 +2501,12 @@ RelationClearRelation(Relation relation, bool rebuild)
*
* When rebuilding an open relcache entry, we must preserve ref count,
* rd_createSubid/rd_newRelfilenodeSubid, and rd_toastoid state. Also
- * attempt to preserve the pg_class entry (rd_rel), tupledesc, and
- * rewrite-rule substructures in place, because various places assume
- * that these structures won't move while they are working with an
- * open relcache entry. (Note: the refcount mechanism for tupledescs
- * might someday allow us to remove this hack for the tupledesc.)
+ * attempt to preserve the pg_class entry (rd_rel), tupledesc,
+ * rewrite-rule, partition key, and partition descriptor substructures
+ * in place, because various places assume that these structures won't
+ * move while they are working with an open relcache entry. (Note:
+ * the refcount mechanism for tupledescs might someday allow us to
+ * remove this hack for the tupledesc.)
*
* Note that this process does not touch CurrentResourceOwner; which
* is good because whatever ref counts the entry may have do not
@@ -2205,6 +2517,8 @@ RelationClearRelation(Relation relation, bool rebuild)
bool keep_tupdesc;
bool keep_rules;
bool keep_policies;
+ bool keep_partkey;
+ bool keep_partdesc;
/* Build temporary entry, but don't link it into hashtable */
newrel = RelationBuildDesc(save_relid, false);
@@ -2235,6 +2549,10 @@ RelationClearRelation(Relation relation, bool rebuild)
keep_tupdesc = equalTupleDescs(relation->rd_att, newrel->rd_att);
keep_rules = equalRuleLocks(relation->rd_rules, newrel->rd_rules);
keep_policies = equalRSDesc(relation->rd_rsdesc, newrel->rd_rsdesc);
+ keep_partkey = (relation->rd_partkey != NULL);
+ keep_partdesc = equalPartitionDescs(relation->rd_partkey,
+ relation->rd_partdesc,
+ newrel->rd_partdesc);
/*
* Perform swapping of the relcache entry contents. Within this
@@ -2289,6 +2607,18 @@ RelationClearRelation(Relation relation, bool rebuild)
SWAPFIELD(Oid, rd_toastoid);
/* pgstat_info must be preserved */
SWAPFIELD(struct PgStat_TableStatus *, pgstat_info);
+ /* partition key must be preserved, if we have one */
+ if (keep_partkey)
+ {
+ SWAPFIELD(PartitionKey, rd_partkey);
+ SWAPFIELD(MemoryContext, rd_partkeycxt);
+ }
+ /* preserve old partdesc if no logical change */
+ if (keep_partdesc)
+ {
+ SWAPFIELD(PartitionDesc, rd_partdesc);
+ SWAPFIELD(MemoryContext, rd_pdcxt);
+ }
#undef SWAPFIELD
@@ -2983,7 +3313,9 @@ RelationBuildLocalRelation(const char *relname,
/* system relations and non-table objects don't have one */
if (!IsSystemNamespace(relnamespace) &&
- (relkind == RELKIND_RELATION || relkind == RELKIND_MATVIEW))
+ (relkind == RELKIND_RELATION ||
+ relkind == RELKIND_MATVIEW ||
+ relkind == RELKIND_PARTITIONED_TABLE))
rel->rd_rel->relreplident = REPLICA_IDENTITY_DEFAULT;
else
rel->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
@@ -3514,6 +3846,20 @@ RelationCacheInitializePhase3(void)
restart = true;
}
+ /*
+ * Reload partition key and descriptor for a partitioned table.
+ */
+ if (relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ RelationBuildPartitionKey(relation);
+ Assert(relation->rd_partkey != NULL);
+
+ RelationBuildPartitionDesc(relation);
+ Assert(relation->rd_partdesc != NULL);
+
+ restart = true;
+ }
+
/* Release hold on the relation */
RelationDecrementReferenceCount(relation);
@@ -4267,6 +4613,8 @@ RelationGetIndexExpressions(Relation relation)
*/
result = (List *) eval_const_expressions(NULL, (Node *) result);
+ result = (List *) canonicalize_qual((Expr *) result);
+
/* May as well fix opfuncids too */
fix_opfuncids((Node *) result);
@@ -5035,6 +5383,10 @@ load_relcache_init_file(bool shared)
rel->rd_rulescxt = NULL;
rel->trigdesc = NULL;
rel->rd_rsdesc = NULL;
+ rel->rd_partkeycxt = NULL;
+ rel->rd_partkey = NULL;
+ rel->rd_partdesc = NULL;
+ rel->rd_partcheck = NIL;
rel->rd_indexprs = NIL;
rel->rd_indpred = NIL;
rel->rd_exclops = NULL;
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index 65ffe84409..a3e0517b94 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -48,6 +48,7 @@
#include "catalog/pg_opclass.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_opfamily.h"
+#include "catalog/pg_partitioned_table.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_range.h"
#include "catalog/pg_rewrite.h"
@@ -568,6 +569,17 @@ static const struct cachedesc cacheinfo[] = {
},
8
},
+ {PartitionedRelationId, /* PARTRELID */
+ PartitionedRelidIndexId,
+ 1,
+ {
+ Anum_pg_partitioned_table_partrelid,
+ 0,
+ 0,
+ 0
+ },
+ 32
+ },
{ProcedureRelationId, /* PROCNAMEARGSNSP */
ProcedureNameArgsNspIndexId,
3,
diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c
index 1cbb9874f3..22f1806eca 100644
--- a/src/bin/pg_dump/common.c
+++ b/src/bin/pg_dump/common.c
@@ -68,6 +68,8 @@ static int numextmembers;
static void flagInhTables(TableInfo *tbinfo, int numTables,
InhInfo *inhinfo, int numInherits);
+static void flagPartitions(TableInfo *tblinfo, int numTables,
+ PartInfo *partinfo, int numPartitions);
static void flagInhAttrs(DumpOptions *dopt, TableInfo *tblinfo, int numTables);
static DumpableObject **buildIndexArray(void *objArray, int numObjs,
Size objSize);
@@ -75,6 +77,8 @@ static int DOCatalogIdCompare(const void *p1, const void *p2);
static int ExtensionMemberIdCompare(const void *p1, const void *p2);
static void findParentsByOid(TableInfo *self,
InhInfo *inhinfo, int numInherits);
+static void findPartitionParentByOid(TableInfo *self, PartInfo *partinfo,
+ int numPartitions);
static int strInArray(const char *pattern, char **arr, int arr_size);
@@ -93,8 +97,10 @@ getSchemaData(Archive *fout, int *numTablesPtr)
NamespaceInfo *nspinfo;
ExtensionInfo *extinfo;
InhInfo *inhinfo;
+ PartInfo *partinfo;
int numAggregates;
int numInherits;
+ int numPartitions;
int numRules;
int numProcLangs;
int numCasts;
@@ -231,6 +237,10 @@ getSchemaData(Archive *fout, int *numTablesPtr)
write_msg(NULL, "reading table inheritance information\n");
inhinfo = getInherits(fout, &numInherits);
+ if (g_verbose)
+ write_msg(NULL, "reading partition information\n");
+ partinfo = getPartitions(fout, &numPartitions);
+
if (g_verbose)
write_msg(NULL, "reading event triggers\n");
getEventTriggers(fout, &numEventTriggers);
@@ -245,6 +255,11 @@ getSchemaData(Archive *fout, int *numTablesPtr)
write_msg(NULL, "finding inheritance relationships\n");
flagInhTables(tblinfo, numTables, inhinfo, numInherits);
+ /* Link tables to partition parents, mark parents as interesting */
+ if (g_verbose)
+ write_msg(NULL, "finding partition relationships\n");
+ flagPartitions(tblinfo, numTables, partinfo, numPartitions);
+
if (g_verbose)
write_msg(NULL, "reading column info for interesting tables\n");
getTableAttrs(fout, tblinfo, numTables);
@@ -273,6 +288,10 @@ getSchemaData(Archive *fout, int *numTablesPtr)
write_msg(NULL, "reading policies\n");
getPolicies(fout, tblinfo, numTables);
+ if (g_verbose)
+ write_msg(NULL, "reading partition key information for interesting tables\n");
+ getTablePartitionKeyInfo(fout, tblinfo, numTables);
+
*numTablesPtr = numTables;
return tblinfo;
}
@@ -319,6 +338,43 @@ flagInhTables(TableInfo *tblinfo, int numTables,
}
}
+/* flagPartitions -
+ * Fill in parent link fields of every target table that is partition,
+ * and mark parents of partitions as interesting
+ *
+ * modifies tblinfo
+ */
+static void
+flagPartitions(TableInfo *tblinfo, int numTables,
+ PartInfo *partinfo, int numPartitions)
+{
+ int i;
+
+ for (i = 0; i < numTables; i++)
+ {
+ /* Some kinds are never partitions */
+ if (tblinfo[i].relkind == RELKIND_SEQUENCE ||
+ tblinfo[i].relkind == RELKIND_VIEW ||
+ tblinfo[i].relkind == RELKIND_MATVIEW)
+ continue;
+
+ /* Don't bother computing anything for non-target tables, either */
+ if (!tblinfo[i].dobj.dump)
+ continue;
+
+ /* Find the parent TableInfo and save */
+ findPartitionParentByOid(&tblinfo[i], partinfo, numPartitions);
+
+ /* Mark the parent as interesting for getTableAttrs */
+ if (tblinfo[i].partitionOf)
+ {
+ tblinfo[i].partitionOf->interesting = true;
+ addObjectDependency(&tblinfo[i].dobj,
+ tblinfo[i].partitionOf->dobj.dumpId);
+ }
+ }
+}
+
/* flagInhAttrs -
* for each dumpable table in tblinfo, flag its inherited attributes
*
@@ -919,6 +975,40 @@ findParentsByOid(TableInfo *self,
self->parents = NULL;
}
+/*
+ * findPartitionParentByOid
+ * find a partition's parent in tblinfo[]
+ */
+static void
+findPartitionParentByOid(TableInfo *self, PartInfo *partinfo,
+ int numPartitions)
+{
+ Oid oid = self->dobj.catId.oid;
+ int i;
+
+ for (i = 0; i < numPartitions; i++)
+ {
+ if (partinfo[i].partrelid == oid)
+ {
+ TableInfo *parent;
+
+ parent = findTableByOid(partinfo[i].partparent);
+ if (parent == NULL)
+ {
+ write_msg(NULL, "failed sanity check, parent OID %u of table \"%s\" (OID %u) not found\n",
+ partinfo[i].partparent,
+ self->dobj.name,
+ oid);
+ exit_nicely(1);
+ }
+ self->partitionOf = parent;
+
+ /* While we're at it, also save the partdef */
+ self->partitiondef = partinfo[i].partdef;
+ }
+ }
+}
+
/*
* parseOidArray
* parse a string of numbers delimited by spaces into a character array
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 42873bb32a..b43d152e77 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -1239,9 +1239,10 @@ expand_table_name_patterns(Archive *fout,
"SELECT c.oid"
"\nFROM pg_catalog.pg_class c"
"\n LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace"
- "\nWHERE c.relkind in ('%c', '%c', '%c', '%c', '%c')\n",
+ "\nWHERE c.relkind in ('%c', '%c', '%c', '%c', '%c', '%c')\n",
RELKIND_RELATION, RELKIND_SEQUENCE, RELKIND_VIEW,
- RELKIND_MATVIEW, RELKIND_FOREIGN_TABLE);
+ RELKIND_MATVIEW, RELKIND_FOREIGN_TABLE,
+ RELKIND_PARTITIONED_TABLE);
processSQLNamePattern(GetConnection(fout), query, cell->val, true,
false, "n.nspname", "c.relname", NULL,
"pg_catalog.pg_table_is_visible(c.oid)");
@@ -2098,6 +2099,9 @@ makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo, bool oids)
/* Skip FOREIGN TABLEs (no data to dump) */
if (tbinfo->relkind == RELKIND_FOREIGN_TABLE)
return;
+ /* Skip partitioned tables (data in partitions) */
+ if (tbinfo->relkind == RELKIND_PARTITIONED_TABLE)
+ return;
/* Don't dump data in unlogged tables, if so requested */
if (tbinfo->relpersistence == RELPERSISTENCE_UNLOGGED &&
@@ -4993,7 +4997,7 @@ getTables(Archive *fout, int *numTables)
"(c.oid = pip.objoid "
"AND pip.classoid = 'pg_class'::regclass "
"AND pip.objsubid = 0) "
- "WHERE c.relkind in ('%c', '%c', '%c', '%c', '%c', '%c') "
+ "WHERE c.relkind in ('%c', '%c', '%c', '%c', '%c', '%c', '%c') "
"ORDER BY c.oid",
acl_subquery->data,
racl_subquery->data,
@@ -5007,7 +5011,8 @@ getTables(Archive *fout, int *numTables)
RELKIND_SEQUENCE,
RELKIND_RELATION, RELKIND_SEQUENCE,
RELKIND_VIEW, RELKIND_COMPOSITE_TYPE,
- RELKIND_MATVIEW, RELKIND_FOREIGN_TABLE);
+ RELKIND_MATVIEW, RELKIND_FOREIGN_TABLE,
+ RELKIND_PARTITIONED_TABLE);
destroyPQExpBuffer(acl_subquery);
destroyPQExpBuffer(racl_subquery);
@@ -5535,7 +5540,9 @@ getTables(Archive *fout, int *numTables)
* We only need to lock the table for certain components; see
* pg_dump.h
*/
- if (tblinfo[i].dobj.dump && tblinfo[i].relkind == RELKIND_RELATION &&
+ if (tblinfo[i].dobj.dump &&
+ (tblinfo[i].relkind == RELKIND_RELATION ||
+ tblinfo->relkind == RELKIND_PARTITIONED_TABLE) &&
(tblinfo[i].dobj.dump & DUMP_COMPONENTS_REQUIRING_LOCK))
{
resetPQExpBuffer(query);
@@ -5635,9 +5642,16 @@ getInherits(Archive *fout, int *numInherits)
/* Make sure we are in proper schema */
selectSourceSchema(fout, "pg_catalog");
- /* find all the inheritance information */
-
- appendPQExpBufferStr(query, "SELECT inhrelid, inhparent FROM pg_inherits");
+ /*
+ * Find all the inheritance information, excluding implicit inheritance
+ * via partitioning. We handle that case using getPartitions(), because
+ * we want more information about partitions than just the parent-child
+ * relationship.
+ */
+ appendPQExpBufferStr(query,
+ "SELECT inhrelid, inhparent "
+ "FROM pg_inherits "
+ "WHERE inhparent NOT IN (SELECT oid FROM pg_class WHERE relkind = 'P')");
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
@@ -5663,6 +5677,70 @@ getInherits(Archive *fout, int *numInherits)
return inhinfo;
}
+/*
+ * getPartitions
+ * read all the partition inheritance and partition bound information
+ * from the system catalogs return them in the PartInfo* structure
+ *
+ * numPartitions is set to the number of pairs read in
+ */
+PartInfo *
+getPartitions(Archive *fout, int *numPartitions)
+{
+ PGresult *res;
+ int ntups;
+ int i;
+ PQExpBuffer query = createPQExpBuffer();
+ PartInfo *partinfo;
+
+ int i_partrelid;
+ int i_partparent;
+ int i_partbound;
+
+ /* Before version 10, there are no partitions */
+ if (fout->remoteVersion < 100000)
+ {
+ *numPartitions = 0;
+ return NULL;
+ }
+
+ /* Make sure we are in proper schema */
+ selectSourceSchema(fout, "pg_catalog");
+
+ /* find the inheritance and boundary information about partitions */
+
+ appendPQExpBufferStr(query,
+ "SELECT inhrelid as partrelid, inhparent AS partparent,"
+ " pg_get_expr(relpartbound, inhrelid) AS partbound"
+ " FROM pg_class c, pg_inherits"
+ " WHERE c.oid = inhrelid AND c.relispartition");
+
+ res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
+
+ ntups = PQntuples(res);
+
+ *numPartitions = ntups;
+
+ partinfo = (PartInfo *) pg_malloc(ntups * sizeof(PartInfo));
+
+ i_partrelid = PQfnumber(res, "partrelid");
+ i_partparent = PQfnumber(res, "partparent");
+ i_partbound = PQfnumber(res, "partbound");
+
+ for (i = 0; i < ntups; i++)
+ {
+ partinfo[i].partrelid = atooid(PQgetvalue(res, i, i_partrelid));
+ partinfo[i].partparent = atooid(PQgetvalue(res, i, i_partparent));
+ partinfo[i].partdef = pg_strdup(PQgetvalue(res, i, i_partbound));
+ }
+
+ PQclear(res);
+
+ destroyPQExpBuffer(query);
+
+ return partinfo;
+}
+
/*
* getIndexes
* get information about every index on a dumpable table
@@ -6933,6 +7011,47 @@ getTransforms(Archive *fout, int *numTransforms)
return transforminfo;
}
+/*
+ * getTablePartitionKeyInfo -
+ * for each interesting partitioned table, read information about its
+ * partition key
+ *
+ * modifies tblinfo
+ */
+void
+getTablePartitionKeyInfo(Archive *fout, TableInfo *tblinfo, int numTables)
+{
+ PQExpBuffer q = createPQExpBuffer();
+ int i,
+ ntups;
+ PGresult *res;
+
+ /* No partitioned tables before 10 */
+ if (fout->remoteVersion < 100000)
+ return;
+
+ for (i = 0; i < numTables; i++)
+ {
+ TableInfo *tbinfo = &(tblinfo[i]);
+
+ /* Only partitioned tables have partition key */
+ if (tbinfo->relkind != RELKIND_PARTITIONED_TABLE)
+ continue;
+
+ /* Don't bother computing anything for non-target tables, either */
+ if (!tbinfo->dobj.dump)
+ continue;
+
+ resetPQExpBuffer(q);
+ appendPQExpBuffer(q, "SELECT pg_catalog.pg_get_partkeydef('%u'::pg_catalog.oid)",
+ tbinfo->dobj.catId.oid);
+ res = ExecuteSqlQuery(fout, q->data, PGRES_TUPLES_OK);
+ ntups = PQntuples(res);
+ Assert(ntups == 1);
+ tbinfo->partkeydef = pg_strdup(PQgetvalue(res, 0, 0));
+ }
+}
+
/*
* getTableAttrs -
* for each interesting table, read info about its attributes
@@ -14201,6 +14320,17 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo)
if (tbinfo->reloftype && !dopt->binary_upgrade)
appendPQExpBuffer(q, " OF %s", tbinfo->reloftype);
+ if (tbinfo->partitionOf && !dopt->binary_upgrade)
+ {
+ TableInfo *parentRel = tbinfo->partitionOf;
+
+ appendPQExpBuffer(q, " PARTITION OF ");
+ if (parentRel->dobj.namespace != tbinfo->dobj.namespace)
+ appendPQExpBuffer(q, "%s.",
+ fmtId(parentRel->dobj.namespace->dobj.name));
+ appendPQExpBufferStr(q, fmtId(parentRel->dobj.name));
+ }
+
if (tbinfo->relkind != RELKIND_MATVIEW)
{
/* Dump the attributes */
@@ -14229,8 +14359,11 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo)
(!tbinfo->inhNotNull[j] ||
dopt->binary_upgrade));
- /* Skip column if fully defined by reloftype */
- if (tbinfo->reloftype &&
+ /*
+ * Skip column if fully defined by reloftype or the
+ * partition parent.
+ */
+ if ((tbinfo->reloftype || tbinfo->partitionOf) &&
!has_default && !has_notnull && !dopt->binary_upgrade)
continue;
@@ -14259,7 +14392,8 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo)
}
/* Attribute type */
- if (tbinfo->reloftype && !dopt->binary_upgrade)
+ if ((tbinfo->reloftype || tbinfo->partitionOf) &&
+ !dopt->binary_upgrade)
{
appendPQExpBufferStr(q, " WITH OPTIONS");
}
@@ -14317,15 +14451,22 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo)
if (actual_atts)
appendPQExpBufferStr(q, "\n)");
- else if (!(tbinfo->reloftype && !dopt->binary_upgrade))
+ else if (!((tbinfo->reloftype || tbinfo->partitionOf) &&
+ !dopt->binary_upgrade))
{
/*
* We must have a parenthesized attribute list, even though
- * empty, when not using the OF TYPE syntax.
+ * empty, when not using the OF TYPE or PARTITION OF syntax.
*/
appendPQExpBufferStr(q, " (\n)");
}
+ if (tbinfo->partitiondef && !dopt->binary_upgrade)
+ {
+ appendPQExpBufferStr(q, "\n");
+ appendPQExpBufferStr(q, tbinfo->partitiondef);
+ }
+
if (numParents > 0 && !dopt->binary_upgrade)
{
appendPQExpBufferStr(q, "\nINHERITS (");
@@ -14343,6 +14484,9 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo)
appendPQExpBufferChar(q, ')');
}
+ if (tbinfo->relkind == RELKIND_PARTITIONED_TABLE)
+ appendPQExpBuffer(q, "\nPARTITION BY %s", tbinfo->partkeydef);
+
if (tbinfo->relkind == RELKIND_FOREIGN_TABLE)
appendPQExpBuffer(q, "\nSERVER %s", fmtId(srvname));
}
@@ -14403,7 +14547,8 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo)
*/
if (dopt->binary_upgrade &&
(tbinfo->relkind == RELKIND_RELATION ||
- tbinfo->relkind == RELKIND_FOREIGN_TABLE))
+ tbinfo->relkind == RELKIND_FOREIGN_TABLE ||
+ tbinfo->relkind == RELKIND_PARTITIONED_TABLE))
{
for (j = 0; j < tbinfo->numatts; j++)
{
@@ -14421,7 +14566,8 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo)
appendStringLiteralAH(q, fmtId(tbinfo->dobj.name), fout);
appendPQExpBufferStr(q, "::pg_catalog.regclass;\n");
- if (tbinfo->relkind == RELKIND_RELATION)
+ if (tbinfo->relkind == RELKIND_RELATION ||
+ tbinfo->relkind == RELKIND_PARTITIONED_TABLE)
appendPQExpBuffer(q, "ALTER TABLE ONLY %s ",
fmtId(tbinfo->dobj.name));
else
@@ -14490,6 +14636,15 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo)
tbinfo->reloftype);
}
+ if (tbinfo->partitionOf)
+ {
+ appendPQExpBufferStr(q, "\n-- For binary upgrade, set up partitions this way.\n");
+ appendPQExpBuffer(q, "ALTER TABLE ONLY %s ATTACH PARTITION %s %s;\n",
+ fmtId(tbinfo->partitionOf->dobj.name),
+ tbinfo->dobj.name,
+ tbinfo->partitiondef);
+ }
+
appendPQExpBufferStr(q, "\n-- For binary upgrade, set heap's relfrozenxid and relminmxid\n");
appendPQExpBuffer(q, "UPDATE pg_catalog.pg_class\n"
"SET relfrozenxid = '%u', relminmxid = '%u'\n"
@@ -14638,6 +14793,7 @@ dumpTableSchema(Archive *fout, TableInfo *tbinfo)
* dump properties we only have ALTER TABLE syntax for
*/
if ((tbinfo->relkind == RELKIND_RELATION ||
+ tbinfo->relkind == RELKIND_PARTITIONED_TABLE ||
tbinfo->relkind == RELKIND_MATVIEW) &&
tbinfo->relreplident != REPLICA_IDENTITY_DEFAULT)
{
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index 7df9066cd7..395a9f3288 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -312,6 +312,7 @@ typedef struct _tableInfo
bool *inhNotNull; /* true if NOT NULL is inherited */
struct _attrDefInfo **attrdefs; /* DEFAULT expressions */
struct _constraintInfo *checkexprs; /* CHECK constraints */
+ char *partkeydef; /* partition key definition */
/*
* Stuff computed only for dumpable tables.
@@ -321,6 +322,8 @@ typedef struct _tableInfo
struct _tableDataInfo *dataObj; /* TableDataInfo, if dumping its data */
int numTriggers; /* number of triggers for table */
struct _triggerInfo *triggers; /* array of TriggerInfo structs */
+ struct _tableInfo *partitionOf; /* TableInfo for the partition parent */
+ char *partitiondef; /* partition key definition */
} TableInfo;
typedef struct _attrDefInfo
@@ -459,6 +462,15 @@ typedef struct _inhInfo
Oid inhparent; /* OID of its parent */
} InhInfo;
+/* PartInfo isn't a DumpableObject, just temporary state */
+typedef struct _partInfo
+{
+ Oid partrelid; /* OID of a partition */
+ Oid partparent; /* OID of its parent */
+ char *partdef; /* partition bound definition */
+} PartInfo;
+
+
typedef struct _prsInfo
{
DumpableObject dobj;
@@ -625,6 +637,7 @@ extern ConvInfo *getConversions(Archive *fout, int *numConversions);
extern TableInfo *getTables(Archive *fout, int *numTables);
extern void getOwnedSeqs(Archive *fout, TableInfo tblinfo[], int numTables);
extern InhInfo *getInherits(Archive *fout, int *numInherits);
+extern PartInfo *getPartitions(Archive *fout, int *numPartitions);
extern void getIndexes(Archive *fout, TableInfo tblinfo[], int numTables);
extern void getConstraints(Archive *fout, TableInfo tblinfo[], int numTables);
extern RuleInfo *getRules(Archive *fout, int *numRules);
@@ -649,5 +662,6 @@ extern void processExtensionTables(Archive *fout, ExtensionInfo extinfo[],
int numExtensions);
extern EventTriggerInfo *getEventTriggers(Archive *fout, int *numEventTriggers);
extern void getPolicies(Archive *fout, TableInfo tblinfo[], int numTables);
+extern void getTablePartitionKeyInfo(Archive *fout, TableInfo *tblinfo, int numTables);
#endif /* PG_DUMP_H */
diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c
index 931c6887f9..f0d955be4f 100644
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@@ -865,6 +865,7 @@ permissionsList(const char *pattern)
" WHEN 'm' THEN '%s'"
" WHEN 'S' THEN '%s'"
" WHEN 'f' THEN '%s'"
+ " WHEN 'P' THEN '%s'"
" END as \"%s\",\n"
" ",
gettext_noop("Schema"),
@@ -874,6 +875,7 @@ permissionsList(const char *pattern)
gettext_noop("materialized view"),
gettext_noop("sequence"),
gettext_noop("foreign table"),
+ gettext_noop("table"), /* partitioned table */
gettext_noop("Type"));
printACLColumn(&buf, "c.relacl");
@@ -954,7 +956,7 @@ permissionsList(const char *pattern)
appendPQExpBufferStr(&buf, "\nFROM pg_catalog.pg_class c\n"
" LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace\n"
- "WHERE c.relkind IN ('r', 'v', 'm', 'S', 'f')\n");
+ "WHERE c.relkind IN ('r', 'v', 'm', 'S', 'f', 'P')\n");
/*
* Unless a schema pattern is specified, we suppress system and temp
@@ -1600,8 +1602,8 @@ describeOneTableDetails(const char *schemaname,
* types, and foreign tables (c.f. CommentObject() in comment.c).
*/
if (tableinfo.relkind == 'r' || tableinfo.relkind == 'v' ||
- tableinfo.relkind == 'm' ||
- tableinfo.relkind == 'f' || tableinfo.relkind == 'c')
+ tableinfo.relkind == 'm' || tableinfo.relkind == 'f' ||
+ tableinfo.relkind == 'c' || tableinfo.relkind == 'P')
appendPQExpBufferStr(&buf, ", pg_catalog.col_description(a.attrelid, a.attnum)");
}
@@ -1666,6 +1668,14 @@ describeOneTableDetails(const char *schemaname,
printfPQExpBuffer(&title, _("Foreign table \"%s.%s\""),
schemaname, relationname);
break;
+ case 'P':
+ if (tableinfo.relpersistence == 'u')
+ printfPQExpBuffer(&title, _("Unlogged table \"%s.%s\""),
+ schemaname, relationname);
+ else
+ printfPQExpBuffer(&title, _("Table \"%s.%s\""),
+ schemaname, relationname);
+ break;
default:
/* untranslated unknown relkind */
printfPQExpBuffer(&title, "?%c? \"%s.%s\"",
@@ -1679,8 +1689,8 @@ describeOneTableDetails(const char *schemaname,
cols = 2;
if (tableinfo.relkind == 'r' || tableinfo.relkind == 'v' ||
- tableinfo.relkind == 'm' ||
- tableinfo.relkind == 'f' || tableinfo.relkind == 'c')
+ tableinfo.relkind == 'm' || tableinfo.relkind == 'f' ||
+ tableinfo.relkind == 'c' || tableinfo.relkind == 'P')
{
headers[cols++] = gettext_noop("Collation");
headers[cols++] = gettext_noop("Nullable");
@@ -1701,12 +1711,12 @@ describeOneTableDetails(const char *schemaname,
{
headers[cols++] = gettext_noop("Storage");
if (tableinfo.relkind == 'r' || tableinfo.relkind == 'm' ||
- tableinfo.relkind == 'f')
+ tableinfo.relkind == 'f' || tableinfo.relkind == 'P')
headers[cols++] = gettext_noop("Stats target");
/* Column comments, if the relkind supports this feature. */
if (tableinfo.relkind == 'r' || tableinfo.relkind == 'v' ||
- tableinfo.relkind == 'm' ||
- tableinfo.relkind == 'c' || tableinfo.relkind == 'f')
+ tableinfo.relkind == 'm' || tableinfo.relkind == 'c' ||
+ tableinfo.relkind == 'f' || tableinfo.relkind == 'P')
headers[cols++] = gettext_noop("Description");
}
@@ -1782,7 +1792,7 @@ describeOneTableDetails(const char *schemaname,
/* Statistics target, if the relkind supports this feature */
if (tableinfo.relkind == 'r' || tableinfo.relkind == 'm' ||
- tableinfo.relkind == 'f')
+ tableinfo.relkind == 'f' || tableinfo.relkind == 'P')
{
printTableAddCell(&cont, PQgetvalue(res, i, firstvcol + 1),
false, false);
@@ -1790,14 +1800,61 @@ describeOneTableDetails(const char *schemaname,
/* Column comments, if the relkind supports this feature. */
if (tableinfo.relkind == 'r' || tableinfo.relkind == 'v' ||
- tableinfo.relkind == 'm' ||
- tableinfo.relkind == 'c' || tableinfo.relkind == 'f')
+ tableinfo.relkind == 'm' || tableinfo.relkind == 'c' ||
+ tableinfo.relkind == 'f' || tableinfo.relkind == 'P')
printTableAddCell(&cont, PQgetvalue(res, i, firstvcol + 2),
false, false);
}
}
/* Make footers */
+ if (pset.sversion >= 90600)
+ {
+ /* Get the partition information */
+ PGresult *result;
+ char *parent_name;
+ char *partdef;
+
+ printfPQExpBuffer(&buf,
+ "SELECT inhparent::pg_catalog.regclass, pg_get_expr(c.relpartbound, inhrelid)"
+ " FROM pg_catalog.pg_class c"
+ " JOIN pg_catalog.pg_inherits"
+ " ON c.oid = inhrelid"
+ " WHERE c.oid = '%s' AND c.relispartition;", oid);
+ result = PSQLexec(buf.data);
+ if (!result)
+ goto error_return;
+
+ if (PQntuples(result) > 0)
+ {
+ parent_name = PQgetvalue(result, 0, 0);
+ partdef = PQgetvalue(result, 0, 1);
+ printfPQExpBuffer(&tmpbuf, _("Partition of: %s %s"), parent_name,
+ partdef);
+ printTableAddFooter(&cont, tmpbuf.data);
+ PQclear(result);
+ }
+ }
+
+ if (tableinfo.relkind == 'P')
+ {
+ /* Get the partition key information */
+ PGresult *result;
+ char *partkeydef;
+
+ printfPQExpBuffer(&buf,
+ "SELECT pg_catalog.pg_get_partkeydef('%s'::pg_catalog.oid);",
+ oid);
+ result = PSQLexec(buf.data);
+ if (!result || PQntuples(result) != 1)
+ goto error_return;
+
+ partkeydef = PQgetvalue(result, 0, 0);
+ printfPQExpBuffer(&tmpbuf, _("Partition key: %s"), partkeydef);
+ printTableAddFooter(&cont, tmpbuf.data);
+ PQclear(result);
+ }
+
if (tableinfo.relkind == 'i')
{
/* Footer information about an index */
@@ -1936,7 +1993,7 @@ describeOneTableDetails(const char *schemaname,
PQclear(result);
}
else if (tableinfo.relkind == 'r' || tableinfo.relkind == 'm' ||
- tableinfo.relkind == 'f')
+ tableinfo.relkind == 'f' || tableinfo.relkind == 'P')
{
/* Footer information about a table */
PGresult *result = NULL;
@@ -2513,7 +2570,7 @@ describeOneTableDetails(const char *schemaname,
* Finish printing the footer information about a table.
*/
if (tableinfo.relkind == 'r' || tableinfo.relkind == 'm' ||
- tableinfo.relkind == 'f')
+ tableinfo.relkind == 'f' || tableinfo.relkind == 'P')
{
PGresult *result;
int tuples;
@@ -2558,8 +2615,12 @@ describeOneTableDetails(const char *schemaname,
PQclear(result);
}
- /* print inherited tables */
- printfPQExpBuffer(&buf, "SELECT c.oid::pg_catalog.regclass FROM pg_catalog.pg_class c, pg_catalog.pg_inherits i WHERE c.oid=i.inhparent AND i.inhrelid = '%s' ORDER BY inhseqno;", oid);
+ /* print inherited tables (exclude, if parent is a partitioned table) */
+ printfPQExpBuffer(&buf,
+ "SELECT c.oid::pg_catalog.regclass"
+ " FROM pg_catalog.pg_class c, pg_catalog.pg_inherits i"
+ " WHERE c.oid=i.inhparent AND i.inhrelid = '%s'"
+ " AND c.relkind != 'P' ORDER BY inhseqno;", oid);
result = PSQLexec(buf.data);
if (!result)
@@ -2588,9 +2649,23 @@ describeOneTableDetails(const char *schemaname,
PQclear(result);
}
- /* print child tables */
- if (pset.sversion >= 80300)
- printfPQExpBuffer(&buf, "SELECT c.oid::pg_catalog.regclass FROM pg_catalog.pg_class c, pg_catalog.pg_inherits i WHERE c.oid=i.inhrelid AND i.inhparent = '%s' ORDER BY c.oid::pg_catalog.regclass::pg_catalog.text;", oid);
+ /* print child tables (with additional info if partitions) */
+ if (pset.sversion >= 100000)
+ printfPQExpBuffer(&buf,
+ "SELECT c.oid::pg_catalog.regclass, pg_get_expr(c.relpartbound, c.oid)"
+ " FROM pg_catalog.pg_class c, pg_catalog.pg_inherits i"
+ " WHERE c.oid=i.inhrelid AND"
+ " i.inhparent = '%s' AND"
+ " EXISTS (SELECT 1 FROM pg_class c WHERE c.oid = '%s')"
+ " ORDER BY c.oid::pg_catalog.regclass::pg_catalog.text;", oid, oid);
+ else if (pset.sversion >= 80300)
+ printfPQExpBuffer(&buf,
+ "SELECT c.oid::pg_catalog.regclass"
+ " FROM pg_catalog.pg_class c, pg_catalog.pg_inherits i"
+ " WHERE c.oid=i.inhrelid AND"
+ " i.inhparent = '%s' AND"
+ " EXISTS (SELECT 1 FROM pg_class c WHERE c.oid = '%s')"
+ " ORDER BY c.oid::pg_catalog.regclass::pg_catalog.text;", oid, oid);
else
printfPQExpBuffer(&buf, "SELECT c.oid::pg_catalog.regclass FROM pg_catalog.pg_class c, pg_catalog.pg_inherits i WHERE c.oid=i.inhrelid AND i.inhparent = '%s' ORDER BY c.relname;", oid);
@@ -2605,24 +2680,39 @@ describeOneTableDetails(const char *schemaname,
/* print the number of child tables, if any */
if (tuples > 0)
{
- printfPQExpBuffer(&buf, _("Number of child tables: %d (Use \\d+ to list them.)"), tuples);
+ if (tableinfo.relkind != 'P')
+ printfPQExpBuffer(&buf, _("Number of child tables: %d (Use \\d+ to list them.)"), tuples);
+ else
+ printfPQExpBuffer(&buf, _("Number of partitions: %d (Use \\d+ to list them.)"), tuples);
printTableAddFooter(&cont, buf.data);
}
}
else
{
/* display the list of child tables */
- const char *ct = _("Child tables");
+ const char *ct = tableinfo.relkind != 'P' ? _("Child tables") : _("Partitions");
int ctw = pg_wcswidth(ct, strlen(ct), pset.encoding);
for (i = 0; i < tuples; i++)
{
- if (i == 0)
- printfPQExpBuffer(&buf, "%s: %s",
- ct, PQgetvalue(result, i, 0));
+ if (tableinfo.relkind != 'P')
+ {
+ if (i == 0)
+ printfPQExpBuffer(&buf, "%s: %s",
+ ct, PQgetvalue(result, i, 0));
+ else
+ printfPQExpBuffer(&buf, "%*s %s",
+ ctw, "", PQgetvalue(result, i, 0));
+ }
else
- printfPQExpBuffer(&buf, "%*s %s",
- ctw, "", PQgetvalue(result, i, 0));
+ {
+ if (i == 0)
+ printfPQExpBuffer(&buf, "%s: %s %s",
+ ct, PQgetvalue(result, i, 0), PQgetvalue(result, i, 1));
+ else
+ printfPQExpBuffer(&buf, "%*s %s %s",
+ ctw, "", PQgetvalue(result, i, 0), PQgetvalue(result, i, 1));
+ }
if (i < tuples - 1)
appendPQExpBufferChar(&buf, ',');
@@ -2717,7 +2807,7 @@ add_tablespace_footer(printTableContent *const cont, char relkind,
Oid tablespace, const bool newline)
{
/* relkinds for which we support tablespaces */
- if (relkind == 'r' || relkind == 'm' || relkind == 'i')
+ if (relkind == 'r' || relkind == 'm' || relkind == 'i' || relkind == 'P')
{
/*
* We ignore the database default tablespace so that users not using
@@ -3051,6 +3141,7 @@ listTables(const char *tabtypes, const char *pattern, bool verbose, bool showSys
" WHEN 'S' THEN '%s'"
" WHEN 's' THEN '%s'"
" WHEN 'f' THEN '%s'"
+ " WHEN 'P' THEN '%s'"
" END as \"%s\",\n"
" pg_catalog.pg_get_userbyid(c.relowner) as \"%s\"",
gettext_noop("Schema"),
@@ -3062,6 +3153,7 @@ listTables(const char *tabtypes, const char *pattern, bool verbose, bool showSys
gettext_noop("sequence"),
gettext_noop("special"),
gettext_noop("foreign table"),
+ gettext_noop("table"), /* partitioned table */
gettext_noop("Type"),
gettext_noop("Owner"));
@@ -3100,7 +3192,7 @@ listTables(const char *tabtypes, const char *pattern, bool verbose, bool showSys
appendPQExpBufferStr(&buf, "\nWHERE c.relkind IN (");
if (showTables)
- appendPQExpBufferStr(&buf, "'r',");
+ appendPQExpBufferStr(&buf, "'r', 'P',");
if (showViews)
appendPQExpBufferStr(&buf, "'v',");
if (showMatViews)
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 6b95052a67..cd64c39b7f 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -452,7 +452,7 @@ static const SchemaQuery Query_for_list_of_tables = {
/* catname */
"pg_catalog.pg_class c",
/* selcondition */
- "c.relkind IN ('r')",
+ "c.relkind IN ('r', 'P')",
/* viscondition */
"pg_catalog.pg_table_is_visible(c.oid)",
/* namespace */
@@ -483,7 +483,7 @@ static const SchemaQuery Query_for_list_of_updatables = {
/* catname */
"pg_catalog.pg_class c",
/* selcondition */
- "c.relkind IN ('r', 'f', 'v')",
+ "c.relkind IN ('r', 'f', 'v', 'P')",
/* viscondition */
"pg_catalog.pg_table_is_visible(c.oid)",
/* namespace */
@@ -513,7 +513,7 @@ static const SchemaQuery Query_for_list_of_tsvmf = {
/* catname */
"pg_catalog.pg_class c",
/* selcondition */
- "c.relkind IN ('r', 'S', 'v', 'm', 'f')",
+ "c.relkind IN ('r', 'S', 'v', 'm', 'f', 'P')",
/* viscondition */
"pg_catalog.pg_table_is_visible(c.oid)",
/* namespace */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 6adfab751d..a602ba3861 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201612061
+#define CATALOG_VERSION_NO 201612071
#endif
diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h
index 4d84a6ba08..e8a302f2fd 100644
--- a/src/include/catalog/dependency.h
+++ b/src/include/catalog/dependency.h
@@ -189,7 +189,8 @@ extern void recordDependencyOnExpr(const ObjectAddress *depender,
extern void recordDependencyOnSingleRelExpr(const ObjectAddress *depender,
Node *expr, Oid relId,
DependencyType behavior,
- DependencyType self_behavior);
+ DependencyType self_behavior,
+ bool ignore_self);
extern ObjectClass getObjectClass(const ObjectAddress *object);
diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h
index b80d8d8b21..77dc1983e8 100644
--- a/src/include/catalog/heap.h
+++ b/src/include/catalog/heap.h
@@ -134,4 +134,15 @@ extern void CheckAttributeType(const char *attname,
List *containing_rowtypes,
bool allow_system_table_mods);
+/* pg_partitioned_table catalog manipulation functions */
+extern void StorePartitionKey(Relation rel,
+ char strategy,
+ int16 partnatts,
+ AttrNumber *partattrs,
+ List *partexprs,
+ Oid *partopclass,
+ Oid *partcollation);
+extern void RemovePartitionKeyByRelId(Oid relid);
+extern void StorePartitionBound(Relation rel, Node *bound);
+
#endif /* HEAP_H */
diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h
index ca5eb3d417..40f7576b7b 100644
--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -319,6 +319,9 @@ DECLARE_UNIQUE_INDEX(pg_replication_origin_roiident_index, 6001, on pg_replicati
DECLARE_UNIQUE_INDEX(pg_replication_origin_roname_index, 6002, on pg_replication_origin using btree(roname text_pattern_ops));
#define ReplicationOriginNameIndex 6002
+DECLARE_UNIQUE_INDEX(pg_partitioned_table_partrelid_index, 3351, on pg_partitioned_table using btree(partrelid oid_ops));
+#define PartitionedRelidIndexId 3351
+
/* last step of initialization script: build the indexes declared above */
BUILD_INDICES
diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h
new file mode 100644
index 0000000000..21effbf87b
--- /dev/null
+++ b/src/include/catalog/partition.h
@@ -0,0 +1,83 @@
+/*-------------------------------------------------------------------------
+ *
+ * partition.h
+ * Header file for structures and utility functions related to
+ * partitioning
+ *
+ * Copyright (c) 2007-2016, PostgreSQL Global Development Group
+ *
+ * src/include/catalog/partition.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PARTITION_H
+#define PARTITION_H
+
+#include "fmgr.h"
+#include "executor/tuptable.h"
+#include "nodes/execnodes.h"
+#include "parser/parse_node.h"
+#include "utils/rel.h"
+
+/*
+ * PartitionBoundInfo encapsulates a set of partition bounds. It is usually
+ * associated with partitioned tables as part of its partition descriptor.
+ *
+ * The internal structure is opaque outside partition.c.
+ */
+typedef struct PartitionBoundInfoData *PartitionBoundInfo;
+
+/*
+ * Information about partitions of a partitioned table.
+ */
+typedef struct PartitionDescData
+{
+ int nparts; /* Number of partitions */
+ Oid *oids; /* OIDs of partitions */
+ PartitionBoundInfo boundinfo; /* collection of partition bounds */
+} PartitionDescData;
+
+typedef struct PartitionDescData *PartitionDesc;
+
+/*-----------------------
+ * PartitionDispatch - information about one partitioned table in a partition
+ * hiearchy required to route a tuple to one of its partitions
+ *
+ * reldesc Relation descriptor of the table
+ * key Partition key information of the table
+ * keystate Execution state required for expressions in the partition key
+ * partdesc Partition descriptor of the table
+ * indexes Array with partdesc->nparts members (for details on what
+ * individual members represent, see how they are set in
+ * RelationGetPartitionDispatchInfo())
+ *-----------------------
+ */
+typedef struct PartitionDispatchData
+{
+ Relation reldesc;
+ PartitionKey key;
+ List *keystate; /* list of ExprState */
+ PartitionDesc partdesc;
+ int *indexes;
+} PartitionDispatchData;
+
+typedef struct PartitionDispatchData *PartitionDispatch;
+
+extern void RelationBuildPartitionDesc(Relation relation);
+extern bool partition_bounds_equal(PartitionKey key,
+ PartitionBoundInfo p1, PartitionBoundInfo p2);
+
+extern void check_new_partition_bound(char *relname, Relation parent, Node *bound);
+extern Oid get_partition_parent(Oid relid);
+extern List *get_qual_from_partbound(Relation rel, Relation parent, Node *bound);
+extern List *RelationGetPartitionQual(Relation rel, bool recurse);
+
+/* For tuple routing */
+extern PartitionDispatch *RelationGetPartitionDispatchInfo(Relation rel,
+ int lockmode, int *num_parted,
+ List **leaf_part_oids);
+extern int get_partition_for_tuple(PartitionDispatch *pd,
+ TupleTableSlot *slot,
+ EState *estate,
+ Oid *failed_at);
+#endif /* PARTITION_H */
diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h
index e57b81c417..a61b7a2917 100644
--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -70,6 +70,7 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO
* not */
bool relispopulated; /* matview currently holds query results */
char relreplident; /* see REPLICA_IDENTITY_xxx constants */
+ bool relispartition; /* is relation a partition? */
TransactionId relfrozenxid; /* all Xids < this are frozen in this rel */
TransactionId relminmxid; /* all multixacts in this rel are >= this.
* this is really a MultiXactId */
@@ -78,6 +79,7 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO
/* NOTE: These fields are not present in a relcache entry's rd_rel field. */
aclitem relacl[1]; /* access permissions */
text reloptions[1]; /* access-method-specific options */
+ pg_node_tree relpartbound; /* partition bound node tree */
#endif
} FormData_pg_class;
@@ -97,7 +99,7 @@ typedef FormData_pg_class *Form_pg_class;
* ----------------
*/
-#define Natts_pg_class 31
+#define Natts_pg_class 33
#define Anum_pg_class_relname 1
#define Anum_pg_class_relnamespace 2
#define Anum_pg_class_reltype 3
@@ -125,10 +127,12 @@ typedef FormData_pg_class *Form_pg_class;
#define Anum_pg_class_relforcerowsecurity 25
#define Anum_pg_class_relispopulated 26
#define Anum_pg_class_relreplident 27
-#define Anum_pg_class_relfrozenxid 28
-#define Anum_pg_class_relminmxid 29
-#define Anum_pg_class_relacl 30
-#define Anum_pg_class_reloptions 31
+#define Anum_pg_class_relispartition 28
+#define Anum_pg_class_relfrozenxid 29
+#define Anum_pg_class_relminmxid 30
+#define Anum_pg_class_relacl 31
+#define Anum_pg_class_reloptions 32
+#define Anum_pg_class_relpartbound 33
/* ----------------
* initial contents of pg_class
@@ -143,13 +147,13 @@ typedef FormData_pg_class *Form_pg_class;
* Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId;
* similarly, "1" in relminmxid stands for FirstMultiXactId
*/
-DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f f t n 3 1 _null_ _null_ ));
+DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f f t n f 3 1 _null_ _null_ _null_));
DESCR("");
-DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 21 0 f f f f f f f t n 3 1 _null_ _null_ ));
+DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 21 0 f f f f f f f t n f 3 1 _null_ _null_ _null_));
DESCR("");
-DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 29 0 t f f f f f f t n 3 1 _null_ _null_ ));
+DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 29 0 t f f f f f f t n f 3 1 _null_ _null_ _null_));
DESCR("");
-DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 f f p r 31 0 t f f f f f f t n 3 1 _null_ _null_ ));
+DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 f f p r 33 0 t f f f f f f t n f 3 1 _null_ _null_ _null_));
DESCR("");
@@ -161,6 +165,7 @@ DESCR("");
#define RELKIND_COMPOSITE_TYPE 'c' /* composite type */
#define RELKIND_FOREIGN_TABLE 'f' /* foreign table */
#define RELKIND_MATVIEW 'm' /* materialized view */
+#define RELKIND_PARTITIONED_TABLE 'P' /* partitioned table */
#define RELPERSISTENCE_PERMANENT 'p' /* regular table */
#define RELPERSISTENCE_UNLOGGED 'u' /* unlogged permanent table */
diff --git a/src/include/catalog/pg_partitioned_table.h b/src/include/catalog/pg_partitioned_table.h
new file mode 100644
index 0000000000..cec54ae62e
--- /dev/null
+++ b/src/include/catalog/pg_partitioned_table.h
@@ -0,0 +1,76 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_partitioned_table.h
+ * definition of the system "partitioned table" relation
+ * along with the relation's initial contents.
+ *
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ *
+ * $PostgreSQL: pgsql/src/include/catalog/pg_partitioned_table.h $
+ *
+ * NOTES
+ * the genbki.sh script reads this file and generates .bki
+ * information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_PARTITIONED_TABLE_H
+#define PG_PARTITIONED_TABLE_H
+
+#include "catalog/genbki.h"
+
+/* ----------------
+ * pg_partitioned_table definition. cpp turns this into
+ * typedef struct FormData_pg_partitioned_table
+ * ----------------
+ */
+#define PartitionedRelationId 3350
+
+CATALOG(pg_partitioned_table,3350) BKI_WITHOUT_OIDS
+{
+ Oid partrelid; /* partitioned table oid */
+ char partstrat; /* partitioning strategy */
+ int16 partnatts; /* number of partition key columns */
+
+ /*
+ * variable-length fields start here, but we allow direct access to
+ * partattrs via the C struct. That's because the first variable-length
+ * field of a heap tuple can be reliably accessed using its C struct
+ * offset, as previous fields are all non-nullable fixed-length fields.
+ */
+ int2vector partattrs; /* each member of the array is the
+ * attribute number of a partition key
+ * column, or 0 if the column is actually
+ * an expression */
+
+#ifdef CATALOG_VARLEN
+ oidvector partclass; /* operator class to compare keys */
+ oidvector partcollation; /* user-specified collation for keys */
+ pg_node_tree partexprs; /* list of expressions in the partitioning
+ * key; one item for each zero entry in
+ * partattrs[] */
+#endif
+} FormData_pg_partitioned_table;
+
+/* ----------------
+ * Form_pg_partitioned_table corresponds to a pointer to a tuple with
+ * the format of pg_partitioned_table relation.
+ * ----------------
+ */
+typedef FormData_pg_partitioned_table *Form_pg_partitioned_table;
+
+/* ----------------
+ * compiler constants for pg_partitioned_table
+ * ----------------
+ */
+#define Natts_pg_partitioned_table 7
+#define Anum_pg_partitioned_table_partrelid 1
+#define Anum_pg_partitioned_table_partstrat 2
+#define Anum_pg_partitioned_table_partnatts 3
+#define Anum_pg_partitioned_table_partattrs 4
+#define Anum_pg_partitioned_table_partclass 5
+#define Anum_pg_partitioned_table_partcollation 6
+#define Anum_pg_partitioned_table_partexprs 7
+
+#endif /* PG_PARTITIONED_TABLE_H */
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 047a1ce71c..96e77ec437 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -1979,6 +1979,8 @@ DATA(insert OID = 1642 ( pg_get_userbyid PGNSP PGUID 12 1 0 0 0 f f f f t f
DESCR("role name by OID (with fallback)");
DATA(insert OID = 1643 ( pg_get_indexdef PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_indexdef _null_ _null_ _null_ ));
DESCR("index description");
+DATA(insert OID = 3352 ( pg_get_partkeydef PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_partkeydef _null_ _null_ _null_ ));
+DESCR("partition key description");
DATA(insert OID = 1662 ( pg_get_triggerdef PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_triggerdef _null_ _null_ _null_ ));
DESCR("trigger description");
DATA(insert OID = 1387 ( pg_get_constraintdef PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_constraintdef _null_ _null_ _null_ ));
diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h
index 2b894ff262..d790fbfee2 100644
--- a/src/include/commands/defrem.h
+++ b/src/include/commands/defrem.h
@@ -42,6 +42,8 @@ extern bool CheckIndexCompatible(Oid oldId,
List *attributeList,
List *exclusionOpNames);
extern Oid GetDefaultOpClass(Oid type_id, Oid am_id);
+extern Oid ResolveOpClass(List *opclass, Oid attrType,
+ char *accessMethodName, Oid accessMethodId);
/* commands/functioncmds.c */
extern ObjectAddress CreateFunction(ParseState *pstate, CreateFunctionStmt *stmt);
diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h
index 7a770f4df5..fa48f2e960 100644
--- a/src/include/commands/tablecmds.h
+++ b/src/include/commands/tablecmds.h
@@ -23,7 +23,7 @@
extern ObjectAddress DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
- ObjectAddress *typaddress);
+ ObjectAddress *typaddress, const char *queryString);
extern void RemoveRelations(DropStmt *drop);
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 136276be53..b4d09f9564 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -14,6 +14,7 @@
#ifndef EXECUTOR_H
#define EXECUTOR_H
+#include "catalog/partition.h"
#include "executor/execdesc.h"
#include "nodes/parsenodes.h"
@@ -188,6 +189,7 @@ extern void CheckValidResultRel(Relation resultRel, CmdType operation);
extern void InitResultRelInfo(ResultRelInfo *resultRelInfo,
Relation resultRelationDesc,
Index resultRelationIndex,
+ bool load_partition_check,
int instrument_options);
extern ResultRelInfo *ExecGetTriggerResultRel(EState *estate, Oid relid);
extern bool ExecContextForcesOids(PlanState *planstate, bool *hasoids);
@@ -211,6 +213,10 @@ extern void EvalPlanQualSetPlan(EPQState *epqstate,
extern void EvalPlanQualSetTuple(EPQState *epqstate, Index rti,
HeapTuple tuple);
extern HeapTuple EvalPlanQualGetTuple(EPQState *epqstate, Index rti);
+extern int ExecFindPartition(ResultRelInfo *resultRelInfo,
+ PartitionDispatch *pd,
+ TupleTableSlot *slot,
+ EState *estate);
#define EvalPlanQualSetSlot(epqstate, slot) ((epqstate)->origslot = (slot))
extern void EvalPlanQualFetchRowMarks(EPQState *epqstate);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 8004d856cc..1de5c8196d 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -16,6 +16,7 @@
#include "access/genam.h"
#include "access/heapam.h"
+#include "access/tupconvert.h"
#include "executor/instrument.h"
#include "lib/pairingheap.h"
#include "nodes/params.h"
@@ -320,6 +321,8 @@ typedef struct JunkFilter
* projectReturning for computing a RETURNING list
* onConflictSetProj for computing ON CONFLICT DO UPDATE SET
* onConflictSetWhere list of ON CONFLICT DO UPDATE exprs (qual)
+ * PartitionCheck partition check expression
+ * PartitionCheckExpr partition check expression state
* ----------------
*/
typedef struct ResultRelInfo
@@ -344,6 +347,8 @@ typedef struct ResultRelInfo
ProjectionInfo *ri_projectReturning;
ProjectionInfo *ri_onConflictSetProj;
List *ri_onConflictSetWhere;
+ List *ri_PartitionCheck;
+ List *ri_PartitionCheckExpr;
} ResultRelInfo;
/* ----------------
@@ -1143,6 +1148,15 @@ typedef struct ModifyTableState
* tlist */
TupleTableSlot *mt_conflproj; /* CONFLICT ... SET ... projection
* target */
+ struct PartitionDispatchData **mt_partition_dispatch_info;
+ /* Tuple-routing support info */
+ int mt_num_dispatch; /* Number of entries in the above
+ * array */
+ int mt_num_partitions; /* Number of members in the
+ * following arrays */
+ ResultRelInfo *mt_partitions; /* Per partition result relation */
+ TupleConversionMap **mt_partition_tupconv_maps;
+ /* Per partition tuple conversion map */
} ModifyTableState;
/* ----------------
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index cb9307cd00..c514d3fc93 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -406,6 +406,7 @@ typedef enum NodeTag
T_AlterPolicyStmt,
T_CreateTransformStmt,
T_CreateAmStmt,
+ T_PartitionCmd,
/*
* TAGS FOR PARSE TREE NODES (parsenodes.h)
@@ -454,6 +455,10 @@ typedef enum NodeTag
T_CommonTableExpr,
T_RoleSpec,
T_TriggerTransition,
+ T_PartitionElem,
+ T_PartitionSpec,
+ T_PartitionBoundSpec,
+ T_PartitionRangeDatum,
/*
* TAGS FOR REPLICATION GRAMMAR PARSE NODES (replnodes.h)
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index f8003e46f3..6b95c48447 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -699,6 +699,79 @@ typedef struct XmlSerialize
int location; /* token location, or -1 if unknown */
} XmlSerialize;
+/* Partitioning related definitions */
+
+/*
+ * PartitionElem - a column in the partition key
+ */
+typedef struct PartitionElem
+{
+ NodeTag type;
+ char *name; /* name of column to partition on, or NULL */
+ Node *expr; /* expression to partition on, or NULL */
+ List *collation; /* name of collation; NIL = default */
+ List *opclass; /* name of desired opclass; NIL = default */
+ int location; /* token location, or -1 if unknown */
+} PartitionElem;
+
+/*
+ * PartitionSpec - partition key specification
+ */
+typedef struct PartitionSpec
+{
+ NodeTag type;
+ char *strategy; /* partitioning strategy ('list' or 'range') */
+ List *partParams; /* List of PartitionElems */
+ int location; /* token location, or -1 if unknown */
+} PartitionSpec;
+
+#define PARTITION_STRATEGY_LIST 'l'
+#define PARTITION_STRATEGY_RANGE 'r'
+
+/*
+ * PartitionBoundSpec - a partition bound specification
+ */
+typedef struct PartitionBoundSpec
+{
+ NodeTag type;
+
+ char strategy;
+
+ /* List partition values */
+ List *listdatums;
+
+ /*
+ * Range partition lower and upper bounds; each member of the lists
+ * is a PartitionRangeDatum (see below).
+ */
+ List *lowerdatums;
+ List *upperdatums;
+
+ int location;
+} PartitionBoundSpec;
+
+/*
+ * PartitionRangeDatum
+ */
+typedef struct PartitionRangeDatum
+{
+ NodeTag type;
+
+ bool infinite;
+ Node *value;
+
+ int location;
+} PartitionRangeDatum;
+
+/*
+ * PartitionCmd - ALTER TABLE partition commands
+ */
+typedef struct PartitionCmd
+{
+ NodeTag type;
+ RangeVar *name;
+ Node *bound;
+} PartitionCmd;
/****************************************************************************
* Nodes for a Query tree
@@ -1549,7 +1622,9 @@ typedef enum AlterTableType
AT_DisableRowSecurity, /* DISABLE ROW SECURITY */
AT_ForceRowSecurity, /* FORCE ROW SECURITY */
AT_NoForceRowSecurity, /* NO FORCE ROW SECURITY */
- AT_GenericOptions /* OPTIONS (...) */
+ AT_GenericOptions, /* OPTIONS (...) */
+ AT_AttachPartition, /* ATTACH PARTITION */
+ AT_DetachPartition /* DETACH PARTITION */
} AlterTableType;
typedef struct ReplicaIdentityStmt
@@ -1775,6 +1850,8 @@ typedef struct CreateStmt
List *tableElts; /* column definitions (list of ColumnDef) */
List *inhRelations; /* relations to inherit from (list of
* inhRelation) */
+ Node *partbound; /* FOR VALUES clause */
+ PartitionSpec *partspec; /* PARTITION BY clause */
TypeName *ofTypename; /* OF typename */
List *constraints; /* constraints (list of Constraint nodes) */
List *options; /* options from WITH clause */
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index 77d873beca..581ff6eedb 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -49,6 +49,7 @@ PG_KEYWORD("assertion", ASSERTION, UNRESERVED_KEYWORD)
PG_KEYWORD("assignment", ASSIGNMENT, UNRESERVED_KEYWORD)
PG_KEYWORD("asymmetric", ASYMMETRIC, RESERVED_KEYWORD)
PG_KEYWORD("at", AT, UNRESERVED_KEYWORD)
+PG_KEYWORD("attach", ATTACH, UNRESERVED_KEYWORD)
PG_KEYWORD("attribute", ATTRIBUTE, UNRESERVED_KEYWORD)
PG_KEYWORD("authorization", AUTHORIZATION, TYPE_FUNC_NAME_KEYWORD)
PG_KEYWORD("backward", BACKWARD, UNRESERVED_KEYWORD)
@@ -127,6 +128,7 @@ PG_KEYWORD("delimiter", DELIMITER, UNRESERVED_KEYWORD)
PG_KEYWORD("delimiters", DELIMITERS, UNRESERVED_KEYWORD)
PG_KEYWORD("depends", DEPENDS, UNRESERVED_KEYWORD)
PG_KEYWORD("desc", DESC, RESERVED_KEYWORD)
+PG_KEYWORD("detach", DETACH, UNRESERVED_KEYWORD)
PG_KEYWORD("dictionary", DICTIONARY, UNRESERVED_KEYWORD)
PG_KEYWORD("disable", DISABLE_P, UNRESERVED_KEYWORD)
PG_KEYWORD("discard", DISCARD, UNRESERVED_KEYWORD)
diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h
index 66335863db..bd6dc020b2 100644
--- a/src/include/parser/parse_node.h
+++ b/src/include/parser/parse_node.h
@@ -64,7 +64,8 @@ typedef enum ParseExprKind
EXPR_KIND_ALTER_COL_TRANSFORM, /* transform expr in ALTER COLUMN TYPE */
EXPR_KIND_EXECUTE_PARAMETER, /* parameter value in EXECUTE */
EXPR_KIND_TRIGGER_WHEN, /* WHEN condition in CREATE TRIGGER */
- EXPR_KIND_POLICY /* USING or WITH CHECK expr in policy */
+ EXPR_KIND_POLICY, /* USING or WITH CHECK expr in policy */
+ EXPR_KIND_PARTITION_EXPRESSION /* PARTITION BY expression */
} ParseExprKind;
diff --git a/src/include/parser/parse_utilcmd.h b/src/include/parser/parse_utilcmd.h
index be3b6f70c1..783bb0009f 100644
--- a/src/include/parser/parse_utilcmd.h
+++ b/src/include/parser/parse_utilcmd.h
@@ -25,5 +25,7 @@ extern IndexStmt *transformIndexStmt(Oid relid, IndexStmt *stmt,
extern void transformRuleStmt(RuleStmt *stmt, const char *queryString,
List **actions, Node **whereClause);
extern List *transformCreateSchemaStmt(CreateSchemaStmt *stmt);
+extern Node *transformPartitionBound(ParseState *pstate, Relation parent,
+ Node *bound);
#endif /* PARSE_UTILCMD_H */
diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
index 96885bb990..58b1db9f68 100644
--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -45,6 +45,11 @@
*/
#define INDEX_MAX_KEYS 32
+/*
+ * Maximum number of columns in a partition key
+ */
+#define PARTITION_MAX_KEYS 32
+
/*
* Set the upper and lower bounds of sequence values.
*/
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 90f5132b03..7ed162322c 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -728,6 +728,7 @@ extern Datum pg_get_viewdef_wrap(PG_FUNCTION_ARGS);
extern Datum pg_get_viewdef_name(PG_FUNCTION_ARGS);
extern Datum pg_get_viewdef_name_ext(PG_FUNCTION_ARGS);
extern Datum pg_get_indexdef(PG_FUNCTION_ARGS);
+extern Datum pg_get_partkeydef(PG_FUNCTION_ARGS);
extern Datum pg_get_indexdef_ext(PG_FUNCTION_ARGS);
extern Datum pg_get_triggerdef(PG_FUNCTION_ARGS);
extern Datum pg_get_triggerdef_ext(PG_FUNCTION_ARGS);
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index fa15f28468..cd7ea1d2dd 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -45,6 +45,35 @@ typedef struct LockInfoData
typedef LockInfoData *LockInfo;
+/*
+ * Information about the partition key of a relation
+ */
+typedef struct PartitionKeyData
+{
+ char strategy; /* partitioning strategy */
+ int16 partnatts; /* number of columns in the partition key */
+ AttrNumber *partattrs; /* attribute numbers of columns in the
+ * partition key */
+ List *partexprs; /* list of expressions in the partitioning
+ * key, or NIL */
+
+ Oid *partopfamily; /* OIDs of operator families */
+ Oid *partopcintype; /* OIDs of opclass declared input data types */
+ FmgrInfo *partsupfunc; /* lookup info for support funcs */
+
+ /* Partitioning collation per attribute */
+ Oid *partcollation;
+
+ /* Type information per attribute */
+ Oid *parttypid;
+ int32 *parttypmod;
+ int16 *parttyplen;
+ bool *parttypbyval;
+ char *parttypalign;
+ Oid *parttypcoll;
+} PartitionKeyData;
+
+typedef struct PartitionKeyData *PartitionKey;
/*
* Here are the contents of a relation cache entry.
@@ -94,6 +123,12 @@ typedef struct RelationData
List *rd_fkeylist; /* list of ForeignKeyCacheInfo (see below) */
bool rd_fkeyvalid; /* true if list has been computed */
+ MemoryContext rd_partkeycxt; /* private memory cxt for the below */
+ struct PartitionKeyData *rd_partkey; /* partition key, or NULL */
+ MemoryContext rd_pdcxt; /* private context for partdesc */
+ struct PartitionDescData *rd_partdesc; /* partitions, or NULL */
+ List *rd_partcheck; /* partition CHECK quals */
+
/* data managed by RelationGetIndexList: */
List *rd_indexlist; /* list of OIDs of indexes on relation */
Oid rd_oidindex; /* OID of unique index on OID, if any */
@@ -534,6 +569,60 @@ typedef struct ViewOptions
RelationNeedsWAL(relation) && \
!IsCatalogRelation(relation))
+/*
+ * RelationGetPartitionKey
+ * Returns the PartitionKey of a relation
+ */
+#define RelationGetPartitionKey(relation) ((relation)->rd_partkey)
+
+/*
+ * PartitionKey inquiry functions
+ */
+static inline int
+get_partition_strategy(PartitionKey key)
+{
+ return key->strategy;
+}
+
+static inline int
+get_partition_natts(PartitionKey key)
+{
+ return key->partnatts;
+}
+
+static inline List *
+get_partition_exprs(PartitionKey key)
+{
+ return key->partexprs;
+}
+
+/*
+ * PartitionKey inquiry functions - one column
+ */
+static inline int16
+get_partition_col_attnum(PartitionKey key, int col)
+{
+ return key->partattrs[col];
+}
+
+static inline Oid
+get_partition_col_typid(PartitionKey key, int col)
+{
+ return key->parttypid[col];
+}
+
+static inline int32
+get_partition_col_typmod(PartitionKey key, int col)
+{
+ return key->parttypmod[col];
+}
+
+/*
+ * RelationGetPartitionDesc
+ * Returns partition descriptor for a relation.
+ */
+#define RelationGetPartitionDesc(relation) ((relation)->rd_partdesc)
+
/* routines in utils/cache/relcache.c */
extern void RelationIncrementReferenceCount(Relation rel);
extern void RelationDecrementReferenceCount(Relation rel);
diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h
index 256615b671..39fe947d6e 100644
--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -72,6 +72,7 @@ enum SysCacheIdentifier
OPEROID,
OPFAMILYAMNAMENSP,
OPFAMILYOID,
+ PARTRELID,
PROCNAMEARGSNSP,
PROCOID,
RANGETYPE,
diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out
index cf9f6d3dfa..09cc193f2f 100644
--- a/src/test/regress/expected/alter_table.out
+++ b/src/test/regress/expected/alter_table.out
@@ -2974,3 +2974,346 @@ NOTICE: column "c3" of relation "test_add_column" already exists, skipping
c4 | integer | | |
DROP TABLE test_add_column;
+-- unsupported constraint types for partitioned tables
+CREATE TABLE partitioned (
+ a int,
+ b int
+) PARTITION BY RANGE (a, (a+b+1));
+ALTER TABLE partitioned ADD UNIQUE (a);
+ERROR: unique constraints are not supported on partitioned tables
+LINE 1: ALTER TABLE partitioned ADD UNIQUE (a);
+ ^
+ALTER TABLE partitioned ADD PRIMARY KEY (a);
+ERROR: primary key constraints are not supported on partitioned tables
+LINE 1: ALTER TABLE partitioned ADD PRIMARY KEY (a);
+ ^
+ALTER TABLE partitioned ADD FOREIGN KEY (a) REFERENCES blah;
+ERROR: foreign key constraints are not supported on partitioned tables
+LINE 1: ALTER TABLE partitioned ADD FOREIGN KEY (a) REFERENCES blah;
+ ^
+ALTER TABLE partitioned ADD EXCLUDE USING gist (a WITH &&);
+ERROR: exclusion constraints are not supported on partitioned tables
+LINE 1: ALTER TABLE partitioned ADD EXCLUDE USING gist (a WITH &&);
+ ^
+-- cannot drop column that is part of the partition key
+ALTER TABLE partitioned DROP COLUMN a;
+ERROR: cannot drop column named in partition key
+ALTER TABLE partitioned ALTER COLUMN a TYPE char(5);
+ERROR: cannot alter type of column named in partition key
+ALTER TABLE partitioned DROP COLUMN b;
+ERROR: cannot drop column referenced in partition key expression
+ALTER TABLE partitioned ALTER COLUMN b TYPE char(5);
+ERROR: cannot alter type of column referenced in partition key expression
+-- cannot drop NOT NULL on columns in the range partition key
+ALTER TABLE partitioned ALTER COLUMN a DROP NOT NULL;
+ERROR: column "a" is in range partition key
+-- partitioned table cannot partiticipate in regular inheritance
+CREATE TABLE foo (
+ a int,
+ b int
+);
+ALTER TABLE partitioned INHERIT foo;
+ERROR: cannot change inheritance of partitioned table
+ALTER TABLE foo INHERIT partitioned;
+ERROR: cannot inherit from partitioned table "partitioned"
+-- cannot add NO INHERIT constraint to partitioned tables
+ALTER TABLE partitioned ADD CONSTRAINT chk_a CHECK (a > 0) NO INHERIT;
+ERROR: cannot add NO INHERIT constraint to partitioned table "partitioned"
+DROP TABLE partitioned, foo;
+--
+-- ATTACH PARTITION
+--
+-- check that target table is partitioned
+CREATE TABLE unparted (
+ a int
+);
+CREATE TABLE fail_part (like unparted);
+ALTER TABLE unparted ATTACH PARTITION fail_part FOR VALUES IN ('a');
+ERROR: "unparted" is not partitioned
+DROP TABLE unparted, fail_part;
+-- check that partition bound is compatible
+CREATE TABLE list_parted (
+ a int NOT NULL,
+ b char(2) COLLATE "en_US",
+ CONSTRAINT check_a CHECK (a > 0)
+) PARTITION BY LIST (a);
+CREATE TABLE fail_part (LIKE list_parted);
+ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES FROM (1) TO (10);
+ERROR: invalid bound specification for a list partition
+LINE 1: ...list_parted ATTACH PARTITION fail_part FOR VALUES FROM (1) T...
+ ^
+DROP TABLE fail_part;
+-- check that the table being attached exists
+ALTER TABLE list_parted ATTACH PARTITION nonexistant FOR VALUES IN (1);
+ERROR: relation "nonexistant" does not exist
+-- check ownership of the source table
+CREATE ROLE regress_test_me;
+CREATE ROLE regress_test_not_me;
+CREATE TABLE not_owned_by_me (LIKE list_parted);
+ALTER TABLE not_owned_by_me OWNER TO regress_test_not_me;
+SET SESSION AUTHORIZATION regress_test_me;
+CREATE TABLE owned_by_me (
+ a int
+) PARTITION BY LIST (a);
+ALTER TABLE owned_by_me ATTACH PARTITION not_owned_by_me FOR VALUES IN (1);
+ERROR: must be owner of relation not_owned_by_me
+RESET SESSION AUTHORIZATION;
+DROP TABLE owned_by_me, not_owned_by_me;
+DROP ROLE regress_test_not_me;
+DROP ROLE regress_test_me;
+-- check that the table being attached is not part of regular inheritance
+CREATE TABLE parent (LIKE list_parted);
+CREATE TABLE child () INHERITS (parent);
+ALTER TABLE list_parted ATTACH PARTITION child FOR VALUES IN (1);
+ERROR: cannot attach inheritance child as partition
+ALTER TABLE list_parted ATTACH PARTITION parent FOR VALUES IN (1);
+ERROR: cannot attach inheritance parent as partition
+DROP TABLE parent CASCADE;
+NOTICE: drop cascades to table child
+-- check any TEMP-ness
+CREATE TEMP TABLE temp_parted (a int) PARTITION BY LIST (a);
+CREATE TABLE perm_part (a int);
+ALTER TABLE temp_parted ATTACH PARTITION perm_part FOR VALUES IN (1);
+ERROR: cannot attach a permanent relation as partition of temporary relation "temp_parted"
+DROP TABLE temp_parted, perm_part;
+-- check that the table being attached is not a typed table
+CREATE TYPE mytype AS (a int);
+CREATE TABLE fail_part OF mytype;
+ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1);
+ERROR: cannot attach a typed table as partition
+DROP TYPE mytype CASCADE;
+NOTICE: drop cascades to table fail_part
+-- check existence (or non-existence) of oid column
+ALTER TABLE list_parted SET WITH OIDS;
+CREATE TABLE fail_part (a int);
+ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1);
+ERROR: cannot attach table "fail_part" without OIDs as partition of table "list_parted" with OIDs
+ALTER TABLE list_parted SET WITHOUT OIDS;
+ALTER TABLE fail_part SET WITH OIDS;
+ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1);
+ERROR: cannot attach table "fail_part" with OIDs as partition of table "list_parted" without OIDs
+DROP TABLE fail_part;
+-- check that the table being attached has only columns present in the parent
+CREATE TABLE fail_part (like list_parted, c int);
+ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1);
+ERROR: table "fail_part" contains column "c" not found in parent "list_parted"
+DETAIL: New partition should contain only the columns present in parent.
+DROP TABLE fail_part;
+-- check that the table being attached has every column of the parent
+CREATE TABLE fail_part (a int NOT NULL);
+ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1);
+ERROR: child table is missing column "b"
+DROP TABLE fail_part;
+-- check that columns match in type, collation and NOT NULL status
+CREATE TABLE fail_part (
+ b char(3),
+ a int NOT NULL
+);
+ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1);
+ERROR: child table "fail_part" has different type for column "b"
+ALTER TABLE fail_part ALTER b TYPE char (2) COLLATE "en_CA";
+ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1);
+ERROR: child table "fail_part" has different collation for column "b"
+DROP TABLE fail_part;
+-- check that the table being attached has all constraints of the parent
+CREATE TABLE fail_part (
+ b char(2) COLLATE "en_US",
+ a int NOT NULL
+);
+ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1);
+ERROR: child table is missing constraint "check_a"
+-- check that the constraint matches in definition with parent's constraint
+ALTER TABLE fail_part ADD CONSTRAINT check_a CHECK (a >= 0);
+ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1);
+ERROR: child table "fail_part" has different definition for check constraint "check_a"
+DROP TABLE fail_part;
+-- check the attributes and constraints after partition is attached
+CREATE TABLE part_1 (
+ a int NOT NULL,
+ b char(2) COLLATE "en_US",
+ CONSTRAINT check_a CHECK (a > 0)
+);
+ALTER TABLE list_parted ATTACH PARTITION part_1 FOR VALUES IN (1);
+-- attislocal and conislocal are always false for merged attributes and constraints respectively.
+SELECT attislocal, attinhcount FROM pg_attribute WHERE attrelid = 'part_1'::regclass AND attnum > 0;
+ attislocal | attinhcount
+------------+-------------
+ f | 1
+ f | 1
+(2 rows)
+
+SELECT conislocal, coninhcount FROM pg_constraint WHERE conrelid = 'part_1'::regclass AND conname = 'check_a';
+ conislocal | coninhcount
+------------+-------------
+ f | 1
+(1 row)
+
+-- check that the new partition won't overlap with an existing partition
+CREATE TABLE fail_part (LIKE part_1 INCLUDING CONSTRAINTS);
+ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1);
+ERROR: partition "fail_part" would overlap partition "part_1"
+-- check validation when attaching list partitions
+CREATE TABLE list_parted2 (
+ a int,
+ b char
+) PARTITION BY LIST (a);
+-- check that violating rows are correctly reported
+CREATE TABLE part_2 (LIKE list_parted2);
+INSERT INTO part_2 VALUES (3, 'a');
+ALTER TABLE list_parted2 ATTACH PARTITION part_2 FOR VALUES IN (2);
+ERROR: partition constraint is violated by some row
+-- should be ok after deleting the bad row
+DELETE FROM part_2;
+ALTER TABLE list_parted2 ATTACH PARTITION part_2 FOR VALUES IN (2);
+-- adding constraints that describe the desired partition constraint
+-- (or more restrictive) will help skip the validation scan
+CREATE TABLE part_3_4 (
+ LIKE list_parted2,
+ CONSTRAINT check_a CHECK (a IN (3))
+);
+-- however, if a list partition does not accept nulls, there should be
+-- an explicit NOT NULL constraint on the partition key column for the
+-- validation scan to be skipped;
+ALTER TABLE list_parted2 ATTACH PARTITION part_3_4 FOR VALUES IN (3, 4);
+-- adding a NOT NULL constraint will cause the scan to be skipped
+ALTER TABLE list_parted2 DETACH PARTITION part_3_4;
+ALTER TABLE part_3_4 ALTER a SET NOT NULL;
+ALTER TABLE list_parted2 ATTACH PARTITION part_3_4 FOR VALUES IN (3, 4);
+NOTICE: skipping scan to validate partition constraint
+-- check validation when attaching range partitions
+CREATE TABLE range_parted (
+ a int,
+ b int
+) PARTITION BY RANGE (a, b);
+-- check that violating rows are correctly reported
+CREATE TABLE part1 (
+ a int NOT NULL CHECK (a = 1),
+ b int NOT NULL CHECK (b >= 1 AND b <= 10)
+);
+INSERT INTO part1 VALUES (1, 10);
+-- Remember the TO bound is exclusive
+ALTER TABLE range_parted ATTACH PARTITION part1 FOR VALUES FROM (1, 1) TO (1, 10);
+ERROR: partition constraint is violated by some row
+-- should be ok after deleting the bad row
+DELETE FROM part1;
+ALTER TABLE range_parted ATTACH PARTITION part1 FOR VALUES FROM (1, 1) TO (1, 10);
+-- adding constraints that describe the desired partition constraint
+-- (or more restrictive) will help skip the validation scan
+CREATE TABLE part2 (
+ a int NOT NULL CHECK (a = 1),
+ b int NOT NULL CHECK (b >= 10 AND b < 18)
+);
+ALTER TABLE range_parted ATTACH PARTITION part2 FOR VALUES FROM (1, 10) TO (1, 20);
+NOTICE: skipping scan to validate partition constraint
+-- check that leaf partitions are scanned when attaching a partitioned
+-- table
+CREATE TABLE part_5 (
+ LIKE list_parted2
+) PARTITION BY LIST (b);
+-- check that violating rows are correctly reported
+CREATE TABLE part_5_a PARTITION OF part_5 FOR VALUES IN ('a');
+INSERT INTO part_5_a (a, b) VALUES (6, 'a');
+ALTER TABLE list_parted2 ATTACH PARTITION part_5 FOR VALUES IN (5);
+ERROR: partition constraint is violated by some row
+-- delete the faulting row and also add a constraint to skip the scan
+DELETE FROM part_5_a WHERE a NOT IN (3);
+ALTER TABLE part_5 ADD CONSTRAINT check_a CHECK (a IN (5)), ALTER a SET NOT NULL;
+ALTER TABLE list_parted2 ATTACH PARTITION part_5 FOR VALUES IN (5);
+NOTICE: skipping scan to validate partition constraint
+-- check that the table being attached is not already a partition
+ALTER TABLE list_parted2 ATTACH PARTITION part_2 FOR VALUES IN (2);
+ERROR: "part_2" is already a partition
+-- check that circular inheritance is not allowed
+ALTER TABLE part_5 ATTACH PARTITION list_parted2 FOR VALUES IN ('b');
+ERROR: circular inheritance not allowed
+DETAIL: "part_5" is already a child of "list_parted2".
+ALTER TABLE list_parted2 ATTACH PARTITION list_parted2 FOR VALUES IN (0);
+ERROR: circular inheritance not allowed
+DETAIL: "list_parted2" is already a child of "list_parted2".
+--
+-- DETACH PARTITION
+--
+-- check that the partition being detached exists at all
+ALTER TABLE list_parted2 DETACH PARTITION part_4;
+ERROR: relation "part_4" does not exist
+-- check that the partition being detached is actually a partition of the parent
+CREATE TABLE not_a_part (a int);
+ALTER TABLE list_parted2 DETACH PARTITION not_a_part;
+ERROR: relation "not_a_part" is not a partition of relation "list_parted2"
+ALTER TABLE list_parted2 DETACH PARTITION part_1;
+ERROR: relation "part_1" is not a partition of relation "list_parted2"
+-- check that, after being detached, attinhcount/coninhcount is dropped to 0 and
+-- attislocal/conislocal is set to true
+ALTER TABLE list_parted2 DETACH PARTITION part_3_4;
+SELECT attinhcount, attislocal FROM pg_attribute WHERE attrelid = 'part_3_4'::regclass AND attnum > 0;
+ attinhcount | attislocal
+-------------+------------
+ 0 | t
+ 0 | t
+(2 rows)
+
+SELECT coninhcount, conislocal FROM pg_constraint WHERE conrelid = 'part_3_4'::regclass AND conname = 'check_a';
+ coninhcount | conislocal
+-------------+------------
+ 0 | t
+(1 row)
+
+DROP TABLE part_3_4;
+-- Check ALTER TABLE commands for partitioned tables and partitions
+-- cannot add/drop column to/from *only* the parent
+ALTER TABLE ONLY list_parted2 ADD COLUMN c int;
+ERROR: column must be added to child tables too
+ALTER TABLE ONLY list_parted2 DROP COLUMN b;
+ERROR: column must be dropped from child tables too
+-- cannot add a column to partition or drop an inherited one
+ALTER TABLE part_2 ADD COLUMN c text;
+ERROR: cannot add column to a partition
+ALTER TABLE part_2 DROP COLUMN b;
+ERROR: cannot drop inherited column "b"
+-- Nor rename, alter type
+ALTER TABLE part_2 RENAME COLUMN b to c;
+ERROR: cannot rename inherited column "b"
+ALTER TABLE part_2 ALTER COLUMN b TYPE text;
+ERROR: cannot alter inherited column "b"
+-- cannot add NOT NULL or check constraints to *only* the parent (ie, non-inherited)
+ALTER TABLE ONLY list_parted2 ALTER b SET NOT NULL;
+ERROR: constraint must be added to child tables too
+ALTER TABLE ONLY list_parted2 add constraint check_b check (b <> 'zz');
+ERROR: constraint must be added to child tables too
+ALTER TABLE list_parted2 add constraint check_b check (b <> 'zz') NO INHERIT;
+ERROR: cannot add NO INHERIT constraint to partitioned table "list_parted2"
+-- cannot drop inherited NOT NULL or check constraints from partition
+ALTER TABLE list_parted2 ALTER b SET NOT NULL, ADD CONSTRAINT check_a2 CHECK (a > 0);
+ALTER TABLE part_2 ALTER b DROP NOT NULL;
+ERROR: column "b" is marked NOT NULL in parent table
+ALTER TABLE part_2 DROP CONSTRAINT check_a2;
+ERROR: cannot drop inherited constraint "check_a2" of relation "part_2"
+-- cannot drop NOT NULL or check constraints from *only* the parent
+ALTER TABLE ONLY list_parted2 ALTER a DROP NOT NULL;
+ERROR: constraint must be dropped from child tables too
+ALTER TABLE ONLY list_parted2 DROP CONSTRAINT check_a2;
+ERROR: constraint must be dropped from child tables too
+-- check that a partition cannot participate in regular inheritance
+CREATE TABLE inh_test () INHERITS (part_2);
+ERROR: cannot inherit from partition "part_2"
+CREATE TABLE inh_test (LIKE part_2);
+ALTER TABLE inh_test INHERIT part_2;
+ERROR: cannot inherit from a partition
+ALTER TABLE part_2 INHERIT inh_test;
+ERROR: cannot change inheritance of a partition
+-- cannot drop or alter type of partition key columns of lower level
+-- partitioned tables; for example, part_5, which is list_parted2's
+-- partition, is partitioned on b;
+ALTER TABLE list_parted2 DROP COLUMN b;
+ERROR: cannot drop column named in partition key
+ALTER TABLE list_parted2 ALTER COLUMN b TYPE text;
+ERROR: cannot alter type of column named in partition key
+-- cleanup
+DROP TABLE list_parted, list_parted2, range_parted CASCADE;
+NOTICE: drop cascades to 6 other objects
+DETAIL: drop cascades to table part1
+drop cascades to table part2
+drop cascades to table part_2
+drop cascades to table part_5
+drop cascades to table part_5_a
+drop cascades to table part_1
diff --git a/src/test/regress/expected/create_table.out b/src/test/regress/expected/create_table.out
index 41ceb874e8..b40a18aec2 100644
--- a/src/test/regress/expected/create_table.out
+++ b/src/test/regress/expected/create_table.out
@@ -253,3 +253,416 @@ DROP TABLE as_select1;
-- check that the oid column is added before the primary key is checked
CREATE TABLE oid_pk (f1 INT, PRIMARY KEY(oid)) WITH OIDS;
DROP TABLE oid_pk;
+--
+-- Partitioned tables
+--
+-- cannot combine INHERITS and PARTITION BY (although grammar allows)
+CREATE TABLE partitioned (
+ a int
+) INHERITS (some_table) PARTITION BY LIST (a);
+ERROR: cannot create partitioned table as inheritance child
+-- cannot use more than 1 column as partition key for list partitioned table
+CREATE TABLE partitioned (
+ a1 int,
+ a2 int
+) PARTITION BY LIST (a1, a2); -- fail
+ERROR: cannot list partition using more than one column
+-- unsupported constraint type for partitioned tables
+CREATE TABLE partitioned (
+ a int PRIMARY KEY
+) PARTITION BY RANGE (a);
+ERROR: primary key constraints are not supported on partitioned tables
+LINE 2: a int PRIMARY KEY
+ ^
+CREATE TABLE pkrel (
+ a int PRIMARY KEY
+);
+CREATE TABLE partitioned (
+ a int REFERENCES pkrel(a)
+) PARTITION BY RANGE (a);
+ERROR: foreign key constraints are not supported on partitioned tables
+LINE 2: a int REFERENCES pkrel(a)
+ ^
+DROP TABLE pkrel;
+CREATE TABLE partitioned (
+ a int UNIQUE
+) PARTITION BY RANGE (a);
+ERROR: unique constraints are not supported on partitioned tables
+LINE 2: a int UNIQUE
+ ^
+CREATE TABLE partitioned (
+ a int,
+ EXCLUDE USING gist (a WITH &&)
+) PARTITION BY RANGE (a);
+ERROR: exclusion constraints are not supported on partitioned tables
+LINE 3: EXCLUDE USING gist (a WITH &&)
+ ^
+-- prevent column from being used twice in the partition key
+CREATE TABLE partitioned (
+ a int
+) PARTITION BY RANGE (a, a);
+ERROR: column "a" appears more than once in partition key
+-- prevent using prohibited expressions in the key
+CREATE FUNCTION retset (a int) RETURNS SETOF int AS $$ SELECT 1; $$ LANGUAGE SQL IMMUTABLE;
+CREATE TABLE partitioned (
+ a int
+) PARTITION BY RANGE (retset(a));
+ERROR: set-returning functions are not allowed in partition key expression
+DROP FUNCTION retset(int);
+CREATE TABLE partitioned (
+ a int
+) PARTITION BY RANGE ((avg(a)));
+ERROR: aggregate functions are not allowed in partition key expression
+CREATE TABLE partitioned (
+ a int,
+ b int
+) PARTITION BY RANGE ((avg(a) OVER (PARTITION BY b)));
+ERROR: window functions are not allowed in partition key expression
+CREATE TABLE partitioned (
+ a int
+) PARTITION BY LIST ((a LIKE (SELECT 1)));
+ERROR: cannot use subquery in partition key expression
+CREATE TABLE partitioned (
+ a int
+) PARTITION BY RANGE (('a'));
+ERROR: cannot use constant expression as partition key
+CREATE FUNCTION const_func () RETURNS int AS $$ SELECT 1; $$ LANGUAGE SQL IMMUTABLE;
+CREATE TABLE partitioned (
+ a int
+) PARTITION BY RANGE (const_func());
+ERROR: cannot use constant expression as partition key
+DROP FUNCTION const_func();
+-- only accept "list" and "range" as partitioning strategy
+CREATE TABLE partitioned (
+ a int
+) PARTITION BY HASH (a);
+ERROR: unrecognized partitioning strategy "hash"
+-- specified column must be present in the table
+CREATE TABLE partitioned (
+ a int
+) PARTITION BY RANGE (b);
+ERROR: column "b" named in partition key does not exist
+-- cannot use system columns in partition key
+CREATE TABLE partitioned (
+ a int
+) PARTITION BY RANGE (xmin);
+ERROR: cannot use system column "xmin" in partition key
+-- functions in key must be immutable
+CREATE FUNCTION immut_func (a int) RETURNS int AS $$ SELECT a + random()::int; $$ LANGUAGE SQL;
+CREATE TABLE partitioned (
+ a int
+) PARTITION BY RANGE (immut_func(a));
+ERROR: functions in partition key expression must be marked IMMUTABLE
+DROP FUNCTION immut_func(int);
+-- cannot contain whole-row references
+CREATE TABLE partitioned (
+ a int
+) PARTITION BY RANGE ((partitioned));
+ERROR: partition key expressions cannot contain whole-row references
+-- prevent using columns of unsupported types in key (type must have a btree operator class)
+CREATE TABLE partitioned (
+ a point
+) PARTITION BY LIST (a);
+ERROR: data type point has no default btree operator class
+HINT: You must specify a btree operator class or define a default btree operator class for the data type.
+CREATE TABLE partitioned (
+ a point
+) PARTITION BY LIST (a point_ops);
+ERROR: operator class "point_ops" does not exist for access method "btree"
+CREATE TABLE partitioned (
+ a point
+) PARTITION BY RANGE (a);
+ERROR: data type point has no default btree operator class
+HINT: You must specify a btree operator class or define a default btree operator class for the data type.
+CREATE TABLE partitioned (
+ a point
+) PARTITION BY RANGE (a point_ops);
+ERROR: operator class "point_ops" does not exist for access method "btree"
+-- cannot add NO INHERIT constraints to partitioned tables
+CREATE TABLE partitioned (
+ a int,
+ CONSTRAINT check_a CHECK (a > 0) NO INHERIT
+) PARTITION BY RANGE (a);
+ERROR: cannot add NO INHERIT constraint to partitioned table "partitioned"
+-- some checks after successful creation of a partitioned table
+CREATE FUNCTION plusone(a int) RETURNS INT AS $$ SELECT a+1; $$ LANGUAGE SQL;
+CREATE TABLE partitioned (
+ a int,
+ b int,
+ c text,
+ d text
+) PARTITION BY RANGE (a oid_ops, plusone(b), c collate "default", d collate "en_US");
+-- check relkind
+SELECT relkind FROM pg_class WHERE relname = 'partitioned';
+ relkind
+---------
+ P
+(1 row)
+
+-- check that range partition key columns are marked NOT NULL
+SELECT attname, attnotnull FROM pg_attribute WHERE attrelid = 'partitioned'::regclass AND attnum > 0;
+ attname | attnotnull
+---------+------------
+ a | t
+ b | f
+ c | t
+ d | t
+(4 rows)
+
+-- prevent a function referenced in partition key from being dropped
+DROP FUNCTION plusone(int);
+ERROR: cannot drop function plusone(integer) because other objects depend on it
+DETAIL: table partitioned depends on function plusone(integer)
+HINT: Use DROP ... CASCADE to drop the dependent objects too.
+-- partitioned table cannot partiticipate in regular inheritance
+CREATE TABLE partitioned2 (
+ a int
+) PARTITION BY LIST ((a+1));
+CREATE TABLE fail () INHERITS (partitioned2);
+ERROR: cannot inherit from partitioned table "partitioned2"
+-- Partition key in describe output
+\d partitioned
+ Table "public.partitioned"
+ Column | Type | Collation | Nullable | Default
+--------+---------+-----------+----------+---------
+ a | integer | | not null |
+ b | integer | | |
+ c | text | | not null |
+ d | text | | not null |
+Partition key: RANGE (a oid_ops, plusone(b), c, d COLLATE "en_US")
+
+\d partitioned2
+ Table "public.partitioned2"
+ Column | Type | Collation | Nullable | Default
+--------+---------+-----------+----------+---------
+ a | integer | | |
+Partition key: LIST ((a + 1))
+
+DROP TABLE partitioned, partitioned2;
+--
+-- Partitions
+--
+-- check partition bound syntax
+CREATE TABLE list_parted (
+ a int
+) PARTITION BY LIST (a);
+-- syntax allows only string literal, numeric literal and null to be
+-- specified for a partition bound value
+CREATE TABLE part_1 PARTITION OF list_parted FOR VALUES IN ('1');
+CREATE TABLE part_2 PARTITION OF list_parted FOR VALUES IN (2);
+CREATE TABLE part_null PARTITION OF list_parted FOR VALUES IN (null);
+CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES IN (int '1');
+ERROR: syntax error at or near "int"
+LINE 1: ... fail_part PARTITION OF list_parted FOR VALUES IN (int '1');
+ ^
+CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES IN ('1'::int);
+ERROR: syntax error at or near "::"
+LINE 1: ...fail_part PARTITION OF list_parted FOR VALUES IN ('1'::int);
+ ^
+-- syntax does not allow empty list of values for list partitions
+CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES IN ();
+ERROR: syntax error at or near ")"
+LINE 1: ...E TABLE fail_part PARTITION OF list_parted FOR VALUES IN ();
+ ^
+-- trying to specify range for list partitioned table
+CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES FROM (1) TO (2);
+ERROR: invalid bound specification for a list partition
+LINE 1: ...BLE fail_part PARTITION OF list_parted FOR VALUES FROM (1) T...
+ ^
+-- specified literal can't be cast to the partition column data type
+CREATE TABLE bools (
+ a bool
+) PARTITION BY LIST (a);
+CREATE TABLE bools_true PARTITION OF bools FOR VALUES IN (1);
+ERROR: specified value cannot be cast to type "boolean" of column "a"
+LINE 1: ...REATE TABLE bools_true PARTITION OF bools FOR VALUES IN (1);
+ ^
+DROP TABLE bools;
+CREATE TABLE range_parted (
+ a date
+) PARTITION BY RANGE (a);
+-- trying to specify list for range partitioned table
+CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES IN ('a');
+ERROR: invalid bound specification for a range partition
+LINE 1: ...BLE fail_part PARTITION OF range_parted FOR VALUES IN ('a');
+ ^
+-- each of start and end bounds must have same number of values as the
+-- length of the partition key
+CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES FROM ('a', 1) TO ('z');
+ERROR: FROM must specify exactly one value per partitioning column
+CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES FROM ('a') TO ('z', 1);
+ERROR: TO must specify exactly one value per partitioning column
+-- cannot specify null values in range bounds
+CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES FROM (null) TO (unbounded);
+ERROR: cannot specify NULL in range bound
+-- check if compatible with the specified parent
+-- cannot create as partition of a non-partitioned table
+CREATE TABLE unparted (
+ a int
+);
+CREATE TABLE fail_part PARTITION OF unparted FOR VALUES IN ('a');
+ERROR: "unparted" is not partitioned
+DROP TABLE unparted;
+-- cannot create a permanent rel as partition of a temp rel
+CREATE TEMP TABLE temp_parted (
+ a int
+) PARTITION BY LIST (a);
+CREATE TABLE fail_part PARTITION OF temp_parted FOR VALUES IN ('a');
+ERROR: cannot create a permanent relation as partition of temporary relation "temp_parted"
+DROP TABLE temp_parted;
+-- cannot create a table with oids as partition of table without oids
+CREATE TABLE no_oids_parted (
+ a int
+) PARTITION BY RANGE (a) WITHOUT OIDS;
+CREATE TABLE fail_part PARTITION OF no_oids_parted FOR VALUES FROM (1) TO (10 )WITH OIDS;
+ERROR: cannot create table with OIDs as partition of table without OIDs
+DROP TABLE no_oids_parted;
+-- likewise, the reverse if also true
+CREATE TABLE oids_parted (
+ a int
+) PARTITION BY RANGE (a) WITH OIDS;
+CREATE TABLE fail_part PARTITION OF oids_parted FOR VALUES FROM (1) TO (10 ) WITHOUT OIDS;
+ERROR: cannot create table without OIDs as partition of table with OIDs
+DROP TABLE oids_parted;
+-- check for partition bound overlap and other invalid specifications
+CREATE TABLE list_parted2 (
+ a varchar
+) PARTITION BY LIST (a);
+CREATE TABLE part_null_z PARTITION OF list_parted2 FOR VALUES IN (null, 'z');
+CREATE TABLE part_ab PARTITION OF list_parted2 FOR VALUES IN ('a', 'b');
+CREATE TABLE fail_part PARTITION OF list_parted2 FOR VALUES IN (null);
+ERROR: partition "fail_part" would overlap partition "part_null_z"
+CREATE TABLE fail_part PARTITION OF list_parted2 FOR VALUES IN ('b', 'c');
+ERROR: partition "fail_part" would overlap partition "part_ab"
+CREATE TABLE range_parted2 (
+ a int
+) PARTITION BY RANGE (a);
+-- trying to create range partition with empty range
+CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (1) TO (0);
+ERROR: cannot create range partition with empty range
+-- note that the range '[1, 1)' has no elements
+CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (1) TO (1);
+ERROR: cannot create range partition with empty range
+CREATE TABLE part0 PARTITION OF range_parted2 FOR VALUES FROM (unbounded) TO (1);
+CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (unbounded) TO (2);
+ERROR: partition "fail_part" would overlap partition "part0"
+CREATE TABLE part1 PARTITION OF range_parted2 FOR VALUES FROM (1) TO (10);
+CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (9) TO (unbounded);
+ERROR: partition "fail_part" would overlap partition "part1"
+-- now check for multi-column range partition key
+CREATE TABLE range_parted3 (
+ a int,
+ b int
+) PARTITION BY RANGE (a, (b+1));
+CREATE TABLE part00 PARTITION OF range_parted3 FOR VALUES FROM (0, unbounded) TO (0, unbounded);
+CREATE TABLE fail_part PARTITION OF range_parted3 FOR VALUES FROM (0, unbounded) TO (0, 1);
+ERROR: partition "fail_part" would overlap partition "part00"
+CREATE TABLE part10 PARTITION OF range_parted3 FOR VALUES FROM (1, unbounded) TO (1, 1);
+CREATE TABLE part11 PARTITION OF range_parted3 FOR VALUES FROM (1, 1) TO (1, 10);
+CREATE TABLE part12 PARTITION OF range_parted3 FOR VALUES FROM (1, 10) TO (1, unbounded);
+CREATE TABLE fail_part PARTITION OF range_parted3 FOR VALUES FROM (1, 10) TO (1, 20);
+ERROR: partition "fail_part" would overlap partition "part12"
+-- cannot create a partition that says column b is allowed to range
+-- from -infinity to +infinity, while there exist partitions that have
+-- more specific ranges
+CREATE TABLE fail_part PARTITION OF range_parted3 FOR VALUES FROM (1, unbounded) TO (1, unbounded);
+ERROR: partition "fail_part" would overlap partition "part10"
+-- check schema propagation from parent
+CREATE TABLE parted (
+ a text,
+ b int NOT NULL DEFAULT 0,
+ CONSTRAINT check_a CHECK (length(a) > 0)
+) PARTITION BY LIST (a);
+CREATE TABLE part_a PARTITION OF parted FOR VALUES IN ('a');
+-- only inherited attributes (never local ones)
+SELECT attname, attislocal, attinhcount FROM pg_attribute WHERE attrelid = 'part_a'::regclass and attnum > 0;
+ attname | attislocal | attinhcount
+---------+------------+-------------
+ a | f | 1
+ b | f | 1
+(2 rows)
+
+-- able to specify column default, column constraint, and table constraint
+CREATE TABLE part_b PARTITION OF parted (
+ b NOT NULL DEFAULT 1 CHECK (b >= 0),
+ CONSTRAINT check_a CHECK (length(a) > 0)
+) FOR VALUES IN ('b');
+NOTICE: merging constraint "check_a" with inherited definition
+-- conislocal should be false for any merged constraints
+SELECT conislocal, coninhcount FROM pg_constraint WHERE conrelid = 'part_b'::regclass AND conname = 'check_a';
+ conislocal | coninhcount
+------------+-------------
+ f | 1
+(1 row)
+
+-- specify PARTITION BY for a partition
+CREATE TABLE fail_part_col_not_found PARTITION OF parted FOR VALUES IN ('c') PARTITION BY RANGE (c);
+ERROR: column "c" named in partition key does not exist
+CREATE TABLE part_c PARTITION OF parted FOR VALUES IN ('c') PARTITION BY RANGE ((b));
+-- create a level-2 partition
+CREATE TABLE part_c_1_10 PARTITION OF part_c FOR VALUES FROM (1) TO (10);
+-- Partition bound in describe output
+\d part_b
+ Table "public.part_b"
+ Column | Type | Collation | Nullable | Default
+--------+---------+-----------+----------+---------
+ a | text | | |
+ b | integer | | not null | 1
+Partition of: parted FOR VALUES IN ('b')
+Check constraints:
+ "check_a" CHECK (length(a) > 0)
+ "part_b_b_check" CHECK (b >= 0)
+
+-- Both partition bound and partition key in describe output
+\d part_c
+ Table "public.part_c"
+ Column | Type | Collation | Nullable | Default
+--------+---------+-----------+----------+---------
+ a | text | | |
+ b | integer | | not null | 0
+Partition of: parted FOR VALUES IN ('c')
+Partition key: RANGE (b)
+Check constraints:
+ "check_a" CHECK (length(a) > 0)
+Number of partitions: 1 (Use \d+ to list them.)
+
+-- Show partition count in the parent's describe output
+-- Tempted to include \d+ output listing partitions with bound info but
+-- output could vary depending on the order in which partition oids are
+-- returned.
+\d parted
+ Table "public.parted"
+ Column | Type | Collation | Nullable | Default
+--------+---------+-----------+----------+---------
+ a | text | | |
+ b | integer | | not null | 0
+Partition key: LIST (a)
+Check constraints:
+ "check_a" CHECK (length(a) > 0)
+Number of partitions: 3 (Use \d+ to list them.)
+
+-- partitions cannot be dropped directly
+DROP TABLE part_a;
+-- need to specify CASCADE to drop partitions along with the parent
+DROP TABLE parted;
+ERROR: cannot drop table parted because other objects depend on it
+DETAIL: table part_b depends on table parted
+table part_c depends on table parted
+table part_c_1_10 depends on table part_c
+HINT: Use DROP ... CASCADE to drop the dependent objects too.
+DROP TABLE parted, list_parted, range_parted, list_parted2, range_parted2, range_parted3 CASCADE;
+NOTICE: drop cascades to 14 other objects
+DETAIL: drop cascades to table part00
+drop cascades to table part10
+drop cascades to table part11
+drop cascades to table part12
+drop cascades to table part0
+drop cascades to table part1
+drop cascades to table part_null_z
+drop cascades to table part_ab
+drop cascades to table part_1
+drop cascades to table part_2
+drop cascades to table part_null
+drop cascades to table part_b
+drop cascades to table part_c
+drop cascades to table part_c_1_10
diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out
index b331828e5d..38ea8e86f3 100644
--- a/src/test/regress/expected/inherit.out
+++ b/src/test/regress/expected/inherit.out
@@ -1542,3 +1542,275 @@ FROM generate_series(1, 3) g(i);
reset enable_seqscan;
reset enable_indexscan;
reset enable_bitmapscan;
+--
+-- Check that constraint exclusion works correctly with partitions using
+-- implicit constraints generated from the partition bound information.
+--
+create table list_parted (
+ a varchar
+) partition by list (a);
+create table part_ab_cd partition of list_parted for values in ('ab', 'cd');
+create table part_ef_gh partition of list_parted for values in ('ef', 'gh');
+create table part_null_xy partition of list_parted for values in (null, 'xy');
+explain (costs off) select * from list_parted;
+ QUERY PLAN
+--------------------------------
+ Append
+ -> Seq Scan on list_parted
+ -> Seq Scan on part_ab_cd
+ -> Seq Scan on part_ef_gh
+ -> Seq Scan on part_null_xy
+(5 rows)
+
+explain (costs off) select * from list_parted where a is null;
+ QUERY PLAN
+--------------------------------
+ Append
+ -> Seq Scan on list_parted
+ Filter: (a IS NULL)
+ -> Seq Scan on part_null_xy
+ Filter: (a IS NULL)
+(5 rows)
+
+explain (costs off) select * from list_parted where a is not null;
+ QUERY PLAN
+---------------------------------
+ Append
+ -> Seq Scan on list_parted
+ Filter: (a IS NOT NULL)
+ -> Seq Scan on part_ab_cd
+ Filter: (a IS NOT NULL)
+ -> Seq Scan on part_ef_gh
+ Filter: (a IS NOT NULL)
+ -> Seq Scan on part_null_xy
+ Filter: (a IS NOT NULL)
+(9 rows)
+
+explain (costs off) select * from list_parted where a in ('ab', 'cd', 'ef');
+ QUERY PLAN
+----------------------------------------------------------
+ Append
+ -> Seq Scan on list_parted
+ Filter: ((a)::text = ANY ('{ab,cd,ef}'::text[]))
+ -> Seq Scan on part_ab_cd
+ Filter: ((a)::text = ANY ('{ab,cd,ef}'::text[]))
+ -> Seq Scan on part_ef_gh
+ Filter: ((a)::text = ANY ('{ab,cd,ef}'::text[]))
+(7 rows)
+
+explain (costs off) select * from list_parted where a = 'ab' or a in (null, 'cd');
+ QUERY PLAN
+---------------------------------------------------------------------------------------
+ Append
+ -> Seq Scan on list_parted
+ Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[])))
+ -> Seq Scan on part_ab_cd
+ Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[])))
+ -> Seq Scan on part_ef_gh
+ Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[])))
+ -> Seq Scan on part_null_xy
+ Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[])))
+(9 rows)
+
+explain (costs off) select * from list_parted where a = 'ab';
+ QUERY PLAN
+------------------------------------------
+ Append
+ -> Seq Scan on list_parted
+ Filter: ((a)::text = 'ab'::text)
+ -> Seq Scan on part_ab_cd
+ Filter: ((a)::text = 'ab'::text)
+(5 rows)
+
+create table range_list_parted (
+ a int,
+ b char(2)
+) partition by range (a);
+create table part_1_10 partition of range_list_parted for values from (1) to (10) partition by list (b);
+create table part_1_10_ab partition of part_1_10 for values in ('ab');
+create table part_1_10_cd partition of part_1_10 for values in ('cd');
+create table part_10_20 partition of range_list_parted for values from (10) to (20) partition by list (b);
+create table part_10_20_ab partition of part_10_20 for values in ('ab');
+create table part_10_20_cd partition of part_10_20 for values in ('cd');
+create table part_21_30 partition of range_list_parted for values from (21) to (30) partition by list (b);
+create table part_21_30_ab partition of part_21_30 for values in ('ab');
+create table part_21_30_cd partition of part_21_30 for values in ('cd');
+create table part_40_inf partition of range_list_parted for values from (40) to (unbounded) partition by list (b);
+create table part_40_inf_ab partition of part_40_inf for values in ('ab');
+create table part_40_inf_cd partition of part_40_inf for values in ('cd');
+create table part_40_inf_null partition of part_40_inf for values in (null);
+explain (costs off) select * from range_list_parted;
+ QUERY PLAN
+-------------------------------------
+ Append
+ -> Seq Scan on range_list_parted
+ -> Seq Scan on part_1_10
+ -> Seq Scan on part_10_20
+ -> Seq Scan on part_21_30
+ -> Seq Scan on part_40_inf
+ -> Seq Scan on part_1_10_ab
+ -> Seq Scan on part_1_10_cd
+ -> Seq Scan on part_10_20_ab
+ -> Seq Scan on part_10_20_cd
+ -> Seq Scan on part_21_30_ab
+ -> Seq Scan on part_21_30_cd
+ -> Seq Scan on part_40_inf_ab
+ -> Seq Scan on part_40_inf_cd
+ -> Seq Scan on part_40_inf_null
+(15 rows)
+
+explain (costs off) select * from range_list_parted where a = 5;
+ QUERY PLAN
+-------------------------------------
+ Append
+ -> Seq Scan on range_list_parted
+ Filter: (a = 5)
+ -> Seq Scan on part_1_10
+ Filter: (a = 5)
+ -> Seq Scan on part_1_10_ab
+ Filter: (a = 5)
+ -> Seq Scan on part_1_10_cd
+ Filter: (a = 5)
+(9 rows)
+
+explain (costs off) select * from range_list_parted where b = 'ab';
+ QUERY PLAN
+-------------------------------------
+ Append
+ -> Seq Scan on range_list_parted
+ Filter: (b = 'ab'::bpchar)
+ -> Seq Scan on part_1_10
+ Filter: (b = 'ab'::bpchar)
+ -> Seq Scan on part_10_20
+ Filter: (b = 'ab'::bpchar)
+ -> Seq Scan on part_21_30
+ Filter: (b = 'ab'::bpchar)
+ -> Seq Scan on part_40_inf
+ Filter: (b = 'ab'::bpchar)
+ -> Seq Scan on part_1_10_ab
+ Filter: (b = 'ab'::bpchar)
+ -> Seq Scan on part_10_20_ab
+ Filter: (b = 'ab'::bpchar)
+ -> Seq Scan on part_21_30_ab
+ Filter: (b = 'ab'::bpchar)
+ -> Seq Scan on part_40_inf_ab
+ Filter: (b = 'ab'::bpchar)
+(19 rows)
+
+explain (costs off) select * from range_list_parted where a between 3 and 23 and b in ('ab');
+ QUERY PLAN
+-----------------------------------------------------------------
+ Append
+ -> Seq Scan on range_list_parted
+ Filter: ((a >= 3) AND (a <= 23) AND (b = 'ab'::bpchar))
+ -> Seq Scan on part_1_10
+ Filter: ((a >= 3) AND (a <= 23) AND (b = 'ab'::bpchar))
+ -> Seq Scan on part_10_20
+ Filter: ((a >= 3) AND (a <= 23) AND (b = 'ab'::bpchar))
+ -> Seq Scan on part_21_30
+ Filter: ((a >= 3) AND (a <= 23) AND (b = 'ab'::bpchar))
+ -> Seq Scan on part_1_10_ab
+ Filter: ((a >= 3) AND (a <= 23) AND (b = 'ab'::bpchar))
+ -> Seq Scan on part_10_20_ab
+ Filter: ((a >= 3) AND (a <= 23) AND (b = 'ab'::bpchar))
+ -> Seq Scan on part_21_30_ab
+ Filter: ((a >= 3) AND (a <= 23) AND (b = 'ab'::bpchar))
+(15 rows)
+
+/* Should select no rows because range partition key cannot be null */
+explain (costs off) select * from range_list_parted where a is null;
+ QUERY PLAN
+--------------------------
+ Result
+ One-Time Filter: false
+(2 rows)
+
+/* Should only select rows from the null-accepting partition */
+explain (costs off) select * from range_list_parted where b is null;
+ QUERY PLAN
+-------------------------------------
+ Append
+ -> Seq Scan on range_list_parted
+ Filter: (b IS NULL)
+ -> Seq Scan on part_1_10
+ Filter: (b IS NULL)
+ -> Seq Scan on part_10_20
+ Filter: (b IS NULL)
+ -> Seq Scan on part_21_30
+ Filter: (b IS NULL)
+ -> Seq Scan on part_40_inf
+ Filter: (b IS NULL)
+ -> Seq Scan on part_40_inf_null
+ Filter: (b IS NULL)
+(13 rows)
+
+explain (costs off) select * from range_list_parted where a is not null and a < 67;
+ QUERY PLAN
+------------------------------------------------
+ Append
+ -> Seq Scan on range_list_parted
+ Filter: ((a IS NOT NULL) AND (a < 67))
+ -> Seq Scan on part_1_10
+ Filter: ((a IS NOT NULL) AND (a < 67))
+ -> Seq Scan on part_10_20
+ Filter: ((a IS NOT NULL) AND (a < 67))
+ -> Seq Scan on part_21_30
+ Filter: ((a IS NOT NULL) AND (a < 67))
+ -> Seq Scan on part_40_inf
+ Filter: ((a IS NOT NULL) AND (a < 67))
+ -> Seq Scan on part_1_10_ab
+ Filter: ((a IS NOT NULL) AND (a < 67))
+ -> Seq Scan on part_1_10_cd
+ Filter: ((a IS NOT NULL) AND (a < 67))
+ -> Seq Scan on part_10_20_ab
+ Filter: ((a IS NOT NULL) AND (a < 67))
+ -> Seq Scan on part_10_20_cd
+ Filter: ((a IS NOT NULL) AND (a < 67))
+ -> Seq Scan on part_21_30_ab
+ Filter: ((a IS NOT NULL) AND (a < 67))
+ -> Seq Scan on part_21_30_cd
+ Filter: ((a IS NOT NULL) AND (a < 67))
+ -> Seq Scan on part_40_inf_ab
+ Filter: ((a IS NOT NULL) AND (a < 67))
+ -> Seq Scan on part_40_inf_cd
+ Filter: ((a IS NOT NULL) AND (a < 67))
+ -> Seq Scan on part_40_inf_null
+ Filter: ((a IS NOT NULL) AND (a < 67))
+(29 rows)
+
+explain (costs off) select * from range_list_parted where a >= 30;
+ QUERY PLAN
+-------------------------------------
+ Append
+ -> Seq Scan on range_list_parted
+ Filter: (a >= 30)
+ -> Seq Scan on part_40_inf
+ Filter: (a >= 30)
+ -> Seq Scan on part_40_inf_ab
+ Filter: (a >= 30)
+ -> Seq Scan on part_40_inf_cd
+ Filter: (a >= 30)
+ -> Seq Scan on part_40_inf_null
+ Filter: (a >= 30)
+(11 rows)
+
+drop table list_parted cascade;
+NOTICE: drop cascades to 3 other objects
+DETAIL: drop cascades to table part_ab_cd
+drop cascades to table part_ef_gh
+drop cascades to table part_null_xy
+drop table range_list_parted cascade;
+NOTICE: drop cascades to 13 other objects
+DETAIL: drop cascades to table part_1_10
+drop cascades to table part_1_10_ab
+drop cascades to table part_1_10_cd
+drop cascades to table part_10_20
+drop cascades to table part_10_20_ab
+drop cascades to table part_10_20_cd
+drop cascades to table part_21_30
+drop cascades to table part_21_30_ab
+drop cascades to table part_21_30_cd
+drop cascades to table part_40_inf
+drop cascades to table part_40_inf_ab
+drop cascades to table part_40_inf_cd
+drop cascades to table part_40_inf_null
diff --git a/src/test/regress/expected/insert.out b/src/test/regress/expected/insert.out
index 03619d71c3..561cefa3c4 100644
--- a/src/test/regress/expected/insert.out
+++ b/src/test/regress/expected/insert.out
@@ -160,3 +160,143 @@ Rules:
drop table inserttest2;
drop table inserttest;
drop type insert_test_type;
+-- direct partition inserts should check partition bound constraint
+create table range_parted (
+ a text,
+ b int
+) partition by range (a, (b+0));
+create table part1 partition of range_parted for values from ('a', 1) to ('a', 10);
+create table part2 partition of range_parted for values from ('a', 10) to ('a', 20);
+create table part3 partition of range_parted for values from ('b', 1) to ('b', 10);
+create table part4 partition of range_parted for values from ('b', 10) to ('b', 20);
+-- fail
+insert into part1 values ('a', 11);
+ERROR: new row for relation "part1" violates partition constraint
+DETAIL: Failing row contains (a, 11).
+insert into part1 values ('b', 1);
+ERROR: new row for relation "part1" violates partition constraint
+DETAIL: Failing row contains (b, 1).
+-- ok
+insert into part1 values ('a', 1);
+-- fail
+insert into part4 values ('b', 21);
+ERROR: new row for relation "part4" violates partition constraint
+DETAIL: Failing row contains (b, 21).
+insert into part4 values ('a', 10);
+ERROR: new row for relation "part4" violates partition constraint
+DETAIL: Failing row contains (a, 10).
+-- ok
+insert into part4 values ('b', 10);
+-- fail (partition key a has a NOT NULL constraint)
+insert into part1 values (null);
+ERROR: null value in column "a" violates not-null constraint
+DETAIL: Failing row contains (null, null).
+-- fail (expression key (b+0) cannot be null either)
+insert into part1 values (1);
+ERROR: new row for relation "part1" violates partition constraint
+DETAIL: Failing row contains (1, null).
+create table list_parted (
+ a text,
+ b int
+) partition by list (lower(a));
+create table part_aa_bb partition of list_parted FOR VALUES IN ('aa', 'bb');
+create table part_cc_dd partition of list_parted FOR VALUES IN ('cc', 'dd');
+create table part_null partition of list_parted FOR VALUES IN (null);
+-- fail
+insert into part_aa_bb values ('cc', 1);
+ERROR: new row for relation "part_aa_bb" violates partition constraint
+DETAIL: Failing row contains (cc, 1).
+insert into part_aa_bb values ('AAa', 1);
+ERROR: new row for relation "part_aa_bb" violates partition constraint
+DETAIL: Failing row contains (AAa, 1).
+insert into part_aa_bb values (null);
+ERROR: new row for relation "part_aa_bb" violates partition constraint
+DETAIL: Failing row contains (null, null).
+-- ok
+insert into part_cc_dd values ('cC', 1);
+insert into part_null values (null, 0);
+-- check in case of multi-level partitioned table
+create table part_ee_ff partition of list_parted for values in ('ee', 'ff') partition by range (b);
+create table part_ee_ff1 partition of part_ee_ff for values from (1) to (10);
+create table part_ee_ff2 partition of part_ee_ff for values from (10) to (20);
+-- fail
+insert into part_ee_ff1 values ('EE', 11);
+ERROR: new row for relation "part_ee_ff1" violates partition constraint
+DETAIL: Failing row contains (EE, 11).
+-- fail (even the parent's, ie, part_ee_ff's partition constraint applies)
+insert into part_ee_ff1 values ('cc', 1);
+ERROR: new row for relation "part_ee_ff1" violates partition constraint
+DETAIL: Failing row contains (cc, 1).
+-- ok
+insert into part_ee_ff1 values ('ff', 1);
+insert into part_ee_ff2 values ('ff', 11);
+-- Check tuple routing for partitioned tables
+-- fail
+insert into range_parted values ('a', 0);
+ERROR: no partition of relation "range_parted" found for row
+DETAIL: Failing row contains (a, 0).
+-- ok
+insert into range_parted values ('a', 1);
+insert into range_parted values ('a', 10);
+-- fail
+insert into range_parted values ('a', 20);
+ERROR: no partition of relation "range_parted" found for row
+DETAIL: Failing row contains (a, 20).
+-- ok
+insert into range_parted values ('b', 1);
+insert into range_parted values ('b', 10);
+-- fail (partition key (b+0) is null)
+insert into range_parted values ('a');
+ERROR: range partition key of row contains null
+select tableoid::regclass, * from range_parted;
+ tableoid | a | b
+----------+---+----
+ part1 | a | 1
+ part1 | a | 1
+ part2 | a | 10
+ part3 | b | 1
+ part4 | b | 10
+ part4 | b | 10
+(6 rows)
+
+-- ok
+insert into list_parted values (null, 1);
+insert into list_parted (a) values ('aA');
+-- fail (partition of part_ee_ff not found in both cases)
+insert into list_parted values ('EE', 0);
+ERROR: no partition of relation "part_ee_ff" found for row
+DETAIL: Failing row contains (EE, 0).
+insert into part_ee_ff values ('EE', 0);
+ERROR: no partition of relation "part_ee_ff" found for row
+DETAIL: Failing row contains (EE, 0).
+-- ok
+insert into list_parted values ('EE', 1);
+insert into part_ee_ff values ('EE', 10);
+select tableoid::regclass, * from list_parted;
+ tableoid | a | b
+-------------+----+----
+ part_aa_bb | aA |
+ part_cc_dd | cC | 1
+ part_null | | 0
+ part_null | | 1
+ part_ee_ff1 | ff | 1
+ part_ee_ff1 | EE | 1
+ part_ee_ff2 | ff | 11
+ part_ee_ff2 | EE | 10
+(8 rows)
+
+-- cleanup
+drop table range_parted cascade;
+NOTICE: drop cascades to 4 other objects
+DETAIL: drop cascades to table part1
+drop cascades to table part2
+drop cascades to table part3
+drop cascades to table part4
+drop table list_parted cascade;
+NOTICE: drop cascades to 6 other objects
+DETAIL: drop cascades to table part_aa_bb
+drop cascades to table part_cc_dd
+drop cascades to table part_null
+drop cascades to table part_ee_ff
+drop cascades to table part_ee_ff1
+drop cascades to table part_ee_ff2
diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out
index b1ebcf60d2..8fa929a6aa 100644
--- a/src/test/regress/expected/sanity_check.out
+++ b/src/test/regress/expected/sanity_check.out
@@ -120,6 +120,7 @@ pg_namespace|t
pg_opclass|t
pg_operator|t
pg_opfamily|t
+pg_partitioned_table|t
pg_pltemplate|t
pg_policy|t
pg_proc|t
diff --git a/src/test/regress/expected/update.out b/src/test/regress/expected/update.out
index 609899e1f7..a1e9255450 100644
--- a/src/test/regress/expected/update.out
+++ b/src/test/regress/expected/update.out
@@ -198,3 +198,30 @@ INSERT INTO upsert_test VALUES (1, 'Bat') ON CONFLICT(a)
DROP TABLE update_test;
DROP TABLE upsert_test;
+-- update to a partition should check partition bound constraint for the new tuple
+create table range_parted (
+ a text,
+ b int
+) partition by range (a, b);
+create table part_a_1_a_10 partition of range_parted for values from ('a', 1) to ('a', 10);
+create table part_a_10_a_20 partition of range_parted for values from ('a', 10) to ('a', 20);
+create table part_b_1_b_10 partition of range_parted for values from ('b', 1) to ('b', 10);
+create table part_b_10_b_20 partition of range_parted for values from ('b', 10) to ('b', 20);
+insert into part_a_1_a_10 values ('a', 1);
+insert into part_b_10_b_20 values ('b', 10);
+-- fail
+update part_a_1_a_10 set a = 'b' where a = 'a';
+ERROR: new row for relation "part_a_1_a_10" violates partition constraint
+DETAIL: Failing row contains (b, 1).
+update range_parted set b = b - 1 where b = 10;
+ERROR: new row for relation "part_b_10_b_20" violates partition constraint
+DETAIL: Failing row contains (b, 9).
+-- ok
+update range_parted set b = b + 1 where b = 10;
+-- cleanup
+drop table range_parted cascade;
+NOTICE: drop cascades to 4 other objects
+DETAIL: drop cascades to table part_a_1_a_10
+drop cascades to table part_a_10_a_20
+drop cascades to table part_b_1_b_10
+drop cascades to table part_b_10_b_20
diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql
index c8eed3ec64..c4ed69304f 100644
--- a/src/test/regress/sql/alter_table.sql
+++ b/src/test/regress/sql/alter_table.sql
@@ -1875,3 +1875,297 @@ ALTER TABLE test_add_column
ADD COLUMN c4 integer;
\d test_add_column
DROP TABLE test_add_column;
+
+-- unsupported constraint types for partitioned tables
+CREATE TABLE partitioned (
+ a int,
+ b int
+) PARTITION BY RANGE (a, (a+b+1));
+ALTER TABLE partitioned ADD UNIQUE (a);
+ALTER TABLE partitioned ADD PRIMARY KEY (a);
+ALTER TABLE partitioned ADD FOREIGN KEY (a) REFERENCES blah;
+ALTER TABLE partitioned ADD EXCLUDE USING gist (a WITH &&);
+
+-- cannot drop column that is part of the partition key
+ALTER TABLE partitioned DROP COLUMN a;
+ALTER TABLE partitioned ALTER COLUMN a TYPE char(5);
+ALTER TABLE partitioned DROP COLUMN b;
+ALTER TABLE partitioned ALTER COLUMN b TYPE char(5);
+
+-- cannot drop NOT NULL on columns in the range partition key
+ALTER TABLE partitioned ALTER COLUMN a DROP NOT NULL;
+
+-- partitioned table cannot partiticipate in regular inheritance
+CREATE TABLE foo (
+ a int,
+ b int
+);
+ALTER TABLE partitioned INHERIT foo;
+ALTER TABLE foo INHERIT partitioned;
+
+-- cannot add NO INHERIT constraint to partitioned tables
+ALTER TABLE partitioned ADD CONSTRAINT chk_a CHECK (a > 0) NO INHERIT;
+
+DROP TABLE partitioned, foo;
+
+--
+-- ATTACH PARTITION
+--
+
+-- check that target table is partitioned
+CREATE TABLE unparted (
+ a int
+);
+CREATE TABLE fail_part (like unparted);
+ALTER TABLE unparted ATTACH PARTITION fail_part FOR VALUES IN ('a');
+DROP TABLE unparted, fail_part;
+
+-- check that partition bound is compatible
+CREATE TABLE list_parted (
+ a int NOT NULL,
+ b char(2) COLLATE "en_US",
+ CONSTRAINT check_a CHECK (a > 0)
+) PARTITION BY LIST (a);
+CREATE TABLE fail_part (LIKE list_parted);
+ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES FROM (1) TO (10);
+DROP TABLE fail_part;
+
+-- check that the table being attached exists
+ALTER TABLE list_parted ATTACH PARTITION nonexistant FOR VALUES IN (1);
+
+-- check ownership of the source table
+CREATE ROLE regress_test_me;
+CREATE ROLE regress_test_not_me;
+CREATE TABLE not_owned_by_me (LIKE list_parted);
+ALTER TABLE not_owned_by_me OWNER TO regress_test_not_me;
+SET SESSION AUTHORIZATION regress_test_me;
+CREATE TABLE owned_by_me (
+ a int
+) PARTITION BY LIST (a);
+ALTER TABLE owned_by_me ATTACH PARTITION not_owned_by_me FOR VALUES IN (1);
+RESET SESSION AUTHORIZATION;
+DROP TABLE owned_by_me, not_owned_by_me;
+DROP ROLE regress_test_not_me;
+DROP ROLE regress_test_me;
+
+-- check that the table being attached is not part of regular inheritance
+CREATE TABLE parent (LIKE list_parted);
+CREATE TABLE child () INHERITS (parent);
+ALTER TABLE list_parted ATTACH PARTITION child FOR VALUES IN (1);
+ALTER TABLE list_parted ATTACH PARTITION parent FOR VALUES IN (1);
+DROP TABLE parent CASCADE;
+
+-- check any TEMP-ness
+CREATE TEMP TABLE temp_parted (a int) PARTITION BY LIST (a);
+CREATE TABLE perm_part (a int);
+ALTER TABLE temp_parted ATTACH PARTITION perm_part FOR VALUES IN (1);
+DROP TABLE temp_parted, perm_part;
+
+-- check that the table being attached is not a typed table
+CREATE TYPE mytype AS (a int);
+CREATE TABLE fail_part OF mytype;
+ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1);
+DROP TYPE mytype CASCADE;
+
+-- check existence (or non-existence) of oid column
+ALTER TABLE list_parted SET WITH OIDS;
+CREATE TABLE fail_part (a int);
+ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1);
+
+ALTER TABLE list_parted SET WITHOUT OIDS;
+ALTER TABLE fail_part SET WITH OIDS;
+ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1);
+DROP TABLE fail_part;
+
+-- check that the table being attached has only columns present in the parent
+CREATE TABLE fail_part (like list_parted, c int);
+ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1);
+DROP TABLE fail_part;
+
+-- check that the table being attached has every column of the parent
+CREATE TABLE fail_part (a int NOT NULL);
+ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1);
+DROP TABLE fail_part;
+
+-- check that columns match in type, collation and NOT NULL status
+CREATE TABLE fail_part (
+ b char(3),
+ a int NOT NULL
+);
+ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1);
+ALTER TABLE fail_part ALTER b TYPE char (2) COLLATE "en_CA";
+ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1);
+DROP TABLE fail_part;
+
+-- check that the table being attached has all constraints of the parent
+CREATE TABLE fail_part (
+ b char(2) COLLATE "en_US",
+ a int NOT NULL
+);
+ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1);
+
+-- check that the constraint matches in definition with parent's constraint
+ALTER TABLE fail_part ADD CONSTRAINT check_a CHECK (a >= 0);
+ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1);
+DROP TABLE fail_part;
+
+-- check the attributes and constraints after partition is attached
+CREATE TABLE part_1 (
+ a int NOT NULL,
+ b char(2) COLLATE "en_US",
+ CONSTRAINT check_a CHECK (a > 0)
+);
+ALTER TABLE list_parted ATTACH PARTITION part_1 FOR VALUES IN (1);
+-- attislocal and conislocal are always false for merged attributes and constraints respectively.
+SELECT attislocal, attinhcount FROM pg_attribute WHERE attrelid = 'part_1'::regclass AND attnum > 0;
+SELECT conislocal, coninhcount FROM pg_constraint WHERE conrelid = 'part_1'::regclass AND conname = 'check_a';
+
+-- check that the new partition won't overlap with an existing partition
+CREATE TABLE fail_part (LIKE part_1 INCLUDING CONSTRAINTS);
+ALTER TABLE list_parted ATTACH PARTITION fail_part FOR VALUES IN (1);
+
+-- check validation when attaching list partitions
+CREATE TABLE list_parted2 (
+ a int,
+ b char
+) PARTITION BY LIST (a);
+
+-- check that violating rows are correctly reported
+CREATE TABLE part_2 (LIKE list_parted2);
+INSERT INTO part_2 VALUES (3, 'a');
+ALTER TABLE list_parted2 ATTACH PARTITION part_2 FOR VALUES IN (2);
+
+-- should be ok after deleting the bad row
+DELETE FROM part_2;
+ALTER TABLE list_parted2 ATTACH PARTITION part_2 FOR VALUES IN (2);
+
+-- adding constraints that describe the desired partition constraint
+-- (or more restrictive) will help skip the validation scan
+CREATE TABLE part_3_4 (
+ LIKE list_parted2,
+ CONSTRAINT check_a CHECK (a IN (3))
+);
+
+-- however, if a list partition does not accept nulls, there should be
+-- an explicit NOT NULL constraint on the partition key column for the
+-- validation scan to be skipped;
+ALTER TABLE list_parted2 ATTACH PARTITION part_3_4 FOR VALUES IN (3, 4);
+
+-- adding a NOT NULL constraint will cause the scan to be skipped
+ALTER TABLE list_parted2 DETACH PARTITION part_3_4;
+ALTER TABLE part_3_4 ALTER a SET NOT NULL;
+ALTER TABLE list_parted2 ATTACH PARTITION part_3_4 FOR VALUES IN (3, 4);
+
+
+-- check validation when attaching range partitions
+CREATE TABLE range_parted (
+ a int,
+ b int
+) PARTITION BY RANGE (a, b);
+
+-- check that violating rows are correctly reported
+CREATE TABLE part1 (
+ a int NOT NULL CHECK (a = 1),
+ b int NOT NULL CHECK (b >= 1 AND b <= 10)
+);
+INSERT INTO part1 VALUES (1, 10);
+-- Remember the TO bound is exclusive
+ALTER TABLE range_parted ATTACH PARTITION part1 FOR VALUES FROM (1, 1) TO (1, 10);
+
+-- should be ok after deleting the bad row
+DELETE FROM part1;
+ALTER TABLE range_parted ATTACH PARTITION part1 FOR VALUES FROM (1, 1) TO (1, 10);
+
+-- adding constraints that describe the desired partition constraint
+-- (or more restrictive) will help skip the validation scan
+CREATE TABLE part2 (
+ a int NOT NULL CHECK (a = 1),
+ b int NOT NULL CHECK (b >= 10 AND b < 18)
+);
+ALTER TABLE range_parted ATTACH PARTITION part2 FOR VALUES FROM (1, 10) TO (1, 20);
+
+-- check that leaf partitions are scanned when attaching a partitioned
+-- table
+CREATE TABLE part_5 (
+ LIKE list_parted2
+) PARTITION BY LIST (b);
+
+-- check that violating rows are correctly reported
+CREATE TABLE part_5_a PARTITION OF part_5 FOR VALUES IN ('a');
+INSERT INTO part_5_a (a, b) VALUES (6, 'a');
+ALTER TABLE list_parted2 ATTACH PARTITION part_5 FOR VALUES IN (5);
+
+-- delete the faulting row and also add a constraint to skip the scan
+DELETE FROM part_5_a WHERE a NOT IN (3);
+ALTER TABLE part_5 ADD CONSTRAINT check_a CHECK (a IN (5)), ALTER a SET NOT NULL;
+ALTER TABLE list_parted2 ATTACH PARTITION part_5 FOR VALUES IN (5);
+
+
+-- check that the table being attached is not already a partition
+ALTER TABLE list_parted2 ATTACH PARTITION part_2 FOR VALUES IN (2);
+
+-- check that circular inheritance is not allowed
+ALTER TABLE part_5 ATTACH PARTITION list_parted2 FOR VALUES IN ('b');
+ALTER TABLE list_parted2 ATTACH PARTITION list_parted2 FOR VALUES IN (0);
+
+--
+-- DETACH PARTITION
+--
+
+-- check that the partition being detached exists at all
+ALTER TABLE list_parted2 DETACH PARTITION part_4;
+
+-- check that the partition being detached is actually a partition of the parent
+CREATE TABLE not_a_part (a int);
+ALTER TABLE list_parted2 DETACH PARTITION not_a_part;
+ALTER TABLE list_parted2 DETACH PARTITION part_1;
+
+-- check that, after being detached, attinhcount/coninhcount is dropped to 0 and
+-- attislocal/conislocal is set to true
+ALTER TABLE list_parted2 DETACH PARTITION part_3_4;
+SELECT attinhcount, attislocal FROM pg_attribute WHERE attrelid = 'part_3_4'::regclass AND attnum > 0;
+SELECT coninhcount, conislocal FROM pg_constraint WHERE conrelid = 'part_3_4'::regclass AND conname = 'check_a';
+DROP TABLE part_3_4;
+
+-- Check ALTER TABLE commands for partitioned tables and partitions
+
+-- cannot add/drop column to/from *only* the parent
+ALTER TABLE ONLY list_parted2 ADD COLUMN c int;
+ALTER TABLE ONLY list_parted2 DROP COLUMN b;
+
+-- cannot add a column to partition or drop an inherited one
+ALTER TABLE part_2 ADD COLUMN c text;
+ALTER TABLE part_2 DROP COLUMN b;
+
+-- Nor rename, alter type
+ALTER TABLE part_2 RENAME COLUMN b to c;
+ALTER TABLE part_2 ALTER COLUMN b TYPE text;
+
+-- cannot add NOT NULL or check constraints to *only* the parent (ie, non-inherited)
+ALTER TABLE ONLY list_parted2 ALTER b SET NOT NULL;
+ALTER TABLE ONLY list_parted2 add constraint check_b check (b <> 'zz');
+ALTER TABLE list_parted2 add constraint check_b check (b <> 'zz') NO INHERIT;
+
+-- cannot drop inherited NOT NULL or check constraints from partition
+ALTER TABLE list_parted2 ALTER b SET NOT NULL, ADD CONSTRAINT check_a2 CHECK (a > 0);
+ALTER TABLE part_2 ALTER b DROP NOT NULL;
+ALTER TABLE part_2 DROP CONSTRAINT check_a2;
+
+-- cannot drop NOT NULL or check constraints from *only* the parent
+ALTER TABLE ONLY list_parted2 ALTER a DROP NOT NULL;
+ALTER TABLE ONLY list_parted2 DROP CONSTRAINT check_a2;
+
+-- check that a partition cannot participate in regular inheritance
+CREATE TABLE inh_test () INHERITS (part_2);
+CREATE TABLE inh_test (LIKE part_2);
+ALTER TABLE inh_test INHERIT part_2;
+ALTER TABLE part_2 INHERIT inh_test;
+
+-- cannot drop or alter type of partition key columns of lower level
+-- partitioned tables; for example, part_5, which is list_parted2's
+-- partition, is partitioned on b;
+ALTER TABLE list_parted2 DROP COLUMN b;
+ALTER TABLE list_parted2 ALTER COLUMN b TYPE text;
+
+-- cleanup
+DROP TABLE list_parted, list_parted2, range_parted CASCADE;
diff --git a/src/test/regress/sql/create_table.sql b/src/test/regress/sql/create_table.sql
index 78bdc8bf5e..69848e3094 100644
--- a/src/test/regress/sql/create_table.sql
+++ b/src/test/regress/sql/create_table.sql
@@ -269,3 +269,318 @@ DROP TABLE as_select1;
-- check that the oid column is added before the primary key is checked
CREATE TABLE oid_pk (f1 INT, PRIMARY KEY(oid)) WITH OIDS;
DROP TABLE oid_pk;
+
+--
+-- Partitioned tables
+--
+
+-- cannot combine INHERITS and PARTITION BY (although grammar allows)
+CREATE TABLE partitioned (
+ a int
+) INHERITS (some_table) PARTITION BY LIST (a);
+
+-- cannot use more than 1 column as partition key for list partitioned table
+CREATE TABLE partitioned (
+ a1 int,
+ a2 int
+) PARTITION BY LIST (a1, a2); -- fail
+
+-- unsupported constraint type for partitioned tables
+CREATE TABLE partitioned (
+ a int PRIMARY KEY
+) PARTITION BY RANGE (a);
+
+CREATE TABLE pkrel (
+ a int PRIMARY KEY
+);
+CREATE TABLE partitioned (
+ a int REFERENCES pkrel(a)
+) PARTITION BY RANGE (a);
+DROP TABLE pkrel;
+
+CREATE TABLE partitioned (
+ a int UNIQUE
+) PARTITION BY RANGE (a);
+
+CREATE TABLE partitioned (
+ a int,
+ EXCLUDE USING gist (a WITH &&)
+) PARTITION BY RANGE (a);
+
+-- prevent column from being used twice in the partition key
+CREATE TABLE partitioned (
+ a int
+) PARTITION BY RANGE (a, a);
+
+-- prevent using prohibited expressions in the key
+CREATE FUNCTION retset (a int) RETURNS SETOF int AS $$ SELECT 1; $$ LANGUAGE SQL IMMUTABLE;
+CREATE TABLE partitioned (
+ a int
+) PARTITION BY RANGE (retset(a));
+DROP FUNCTION retset(int);
+
+CREATE TABLE partitioned (
+ a int
+) PARTITION BY RANGE ((avg(a)));
+
+CREATE TABLE partitioned (
+ a int,
+ b int
+) PARTITION BY RANGE ((avg(a) OVER (PARTITION BY b)));
+
+CREATE TABLE partitioned (
+ a int
+) PARTITION BY LIST ((a LIKE (SELECT 1)));
+
+CREATE TABLE partitioned (
+ a int
+) PARTITION BY RANGE (('a'));
+
+CREATE FUNCTION const_func () RETURNS int AS $$ SELECT 1; $$ LANGUAGE SQL IMMUTABLE;
+CREATE TABLE partitioned (
+ a int
+) PARTITION BY RANGE (const_func());
+DROP FUNCTION const_func();
+
+-- only accept "list" and "range" as partitioning strategy
+CREATE TABLE partitioned (
+ a int
+) PARTITION BY HASH (a);
+
+-- specified column must be present in the table
+CREATE TABLE partitioned (
+ a int
+) PARTITION BY RANGE (b);
+
+-- cannot use system columns in partition key
+CREATE TABLE partitioned (
+ a int
+) PARTITION BY RANGE (xmin);
+
+-- functions in key must be immutable
+CREATE FUNCTION immut_func (a int) RETURNS int AS $$ SELECT a + random()::int; $$ LANGUAGE SQL;
+CREATE TABLE partitioned (
+ a int
+) PARTITION BY RANGE (immut_func(a));
+DROP FUNCTION immut_func(int);
+
+-- cannot contain whole-row references
+CREATE TABLE partitioned (
+ a int
+) PARTITION BY RANGE ((partitioned));
+
+-- prevent using columns of unsupported types in key (type must have a btree operator class)
+CREATE TABLE partitioned (
+ a point
+) PARTITION BY LIST (a);
+CREATE TABLE partitioned (
+ a point
+) PARTITION BY LIST (a point_ops);
+CREATE TABLE partitioned (
+ a point
+) PARTITION BY RANGE (a);
+CREATE TABLE partitioned (
+ a point
+) PARTITION BY RANGE (a point_ops);
+
+-- cannot add NO INHERIT constraints to partitioned tables
+CREATE TABLE partitioned (
+ a int,
+ CONSTRAINT check_a CHECK (a > 0) NO INHERIT
+) PARTITION BY RANGE (a);
+
+-- some checks after successful creation of a partitioned table
+CREATE FUNCTION plusone(a int) RETURNS INT AS $$ SELECT a+1; $$ LANGUAGE SQL;
+
+CREATE TABLE partitioned (
+ a int,
+ b int,
+ c text,
+ d text
+) PARTITION BY RANGE (a oid_ops, plusone(b), c collate "default", d collate "en_US");
+
+-- check relkind
+SELECT relkind FROM pg_class WHERE relname = 'partitioned';
+
+-- check that range partition key columns are marked NOT NULL
+SELECT attname, attnotnull FROM pg_attribute WHERE attrelid = 'partitioned'::regclass AND attnum > 0;
+
+-- prevent a function referenced in partition key from being dropped
+DROP FUNCTION plusone(int);
+
+-- partitioned table cannot partiticipate in regular inheritance
+CREATE TABLE partitioned2 (
+ a int
+) PARTITION BY LIST ((a+1));
+CREATE TABLE fail () INHERITS (partitioned2);
+
+-- Partition key in describe output
+\d partitioned
+\d partitioned2
+
+DROP TABLE partitioned, partitioned2;
+
+--
+-- Partitions
+--
+
+-- check partition bound syntax
+
+CREATE TABLE list_parted (
+ a int
+) PARTITION BY LIST (a);
+-- syntax allows only string literal, numeric literal and null to be
+-- specified for a partition bound value
+CREATE TABLE part_1 PARTITION OF list_parted FOR VALUES IN ('1');
+CREATE TABLE part_2 PARTITION OF list_parted FOR VALUES IN (2);
+CREATE TABLE part_null PARTITION OF list_parted FOR VALUES IN (null);
+CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES IN (int '1');
+CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES IN ('1'::int);
+
+-- syntax does not allow empty list of values for list partitions
+CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES IN ();
+-- trying to specify range for list partitioned table
+CREATE TABLE fail_part PARTITION OF list_parted FOR VALUES FROM (1) TO (2);
+
+-- specified literal can't be cast to the partition column data type
+CREATE TABLE bools (
+ a bool
+) PARTITION BY LIST (a);
+CREATE TABLE bools_true PARTITION OF bools FOR VALUES IN (1);
+DROP TABLE bools;
+
+CREATE TABLE range_parted (
+ a date
+) PARTITION BY RANGE (a);
+
+-- trying to specify list for range partitioned table
+CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES IN ('a');
+-- each of start and end bounds must have same number of values as the
+-- length of the partition key
+CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES FROM ('a', 1) TO ('z');
+CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES FROM ('a') TO ('z', 1);
+
+-- cannot specify null values in range bounds
+CREATE TABLE fail_part PARTITION OF range_parted FOR VALUES FROM (null) TO (unbounded);
+
+-- check if compatible with the specified parent
+
+-- cannot create as partition of a non-partitioned table
+CREATE TABLE unparted (
+ a int
+);
+CREATE TABLE fail_part PARTITION OF unparted FOR VALUES IN ('a');
+DROP TABLE unparted;
+
+-- cannot create a permanent rel as partition of a temp rel
+CREATE TEMP TABLE temp_parted (
+ a int
+) PARTITION BY LIST (a);
+CREATE TABLE fail_part PARTITION OF temp_parted FOR VALUES IN ('a');
+DROP TABLE temp_parted;
+
+-- cannot create a table with oids as partition of table without oids
+CREATE TABLE no_oids_parted (
+ a int
+) PARTITION BY RANGE (a) WITHOUT OIDS;
+CREATE TABLE fail_part PARTITION OF no_oids_parted FOR VALUES FROM (1) TO (10 )WITH OIDS;
+DROP TABLE no_oids_parted;
+
+-- likewise, the reverse if also true
+CREATE TABLE oids_parted (
+ a int
+) PARTITION BY RANGE (a) WITH OIDS;
+CREATE TABLE fail_part PARTITION OF oids_parted FOR VALUES FROM (1) TO (10 ) WITHOUT OIDS;
+DROP TABLE oids_parted;
+
+-- check for partition bound overlap and other invalid specifications
+
+CREATE TABLE list_parted2 (
+ a varchar
+) PARTITION BY LIST (a);
+CREATE TABLE part_null_z PARTITION OF list_parted2 FOR VALUES IN (null, 'z');
+CREATE TABLE part_ab PARTITION OF list_parted2 FOR VALUES IN ('a', 'b');
+
+CREATE TABLE fail_part PARTITION OF list_parted2 FOR VALUES IN (null);
+CREATE TABLE fail_part PARTITION OF list_parted2 FOR VALUES IN ('b', 'c');
+
+CREATE TABLE range_parted2 (
+ a int
+) PARTITION BY RANGE (a);
+
+-- trying to create range partition with empty range
+CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (1) TO (0);
+-- note that the range '[1, 1)' has no elements
+CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (1) TO (1);
+
+CREATE TABLE part0 PARTITION OF range_parted2 FOR VALUES FROM (unbounded) TO (1);
+CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (unbounded) TO (2);
+CREATE TABLE part1 PARTITION OF range_parted2 FOR VALUES FROM (1) TO (10);
+CREATE TABLE fail_part PARTITION OF range_parted2 FOR VALUES FROM (9) TO (unbounded);
+
+-- now check for multi-column range partition key
+CREATE TABLE range_parted3 (
+ a int,
+ b int
+) PARTITION BY RANGE (a, (b+1));
+
+CREATE TABLE part00 PARTITION OF range_parted3 FOR VALUES FROM (0, unbounded) TO (0, unbounded);
+CREATE TABLE fail_part PARTITION OF range_parted3 FOR VALUES FROM (0, unbounded) TO (0, 1);
+
+CREATE TABLE part10 PARTITION OF range_parted3 FOR VALUES FROM (1, unbounded) TO (1, 1);
+CREATE TABLE part11 PARTITION OF range_parted3 FOR VALUES FROM (1, 1) TO (1, 10);
+CREATE TABLE part12 PARTITION OF range_parted3 FOR VALUES FROM (1, 10) TO (1, unbounded);
+CREATE TABLE fail_part PARTITION OF range_parted3 FOR VALUES FROM (1, 10) TO (1, 20);
+
+-- cannot create a partition that says column b is allowed to range
+-- from -infinity to +infinity, while there exist partitions that have
+-- more specific ranges
+CREATE TABLE fail_part PARTITION OF range_parted3 FOR VALUES FROM (1, unbounded) TO (1, unbounded);
+
+-- check schema propagation from parent
+
+CREATE TABLE parted (
+ a text,
+ b int NOT NULL DEFAULT 0,
+ CONSTRAINT check_a CHECK (length(a) > 0)
+) PARTITION BY LIST (a);
+
+CREATE TABLE part_a PARTITION OF parted FOR VALUES IN ('a');
+
+-- only inherited attributes (never local ones)
+SELECT attname, attislocal, attinhcount FROM pg_attribute WHERE attrelid = 'part_a'::regclass and attnum > 0;
+
+-- able to specify column default, column constraint, and table constraint
+CREATE TABLE part_b PARTITION OF parted (
+ b NOT NULL DEFAULT 1 CHECK (b >= 0),
+ CONSTRAINT check_a CHECK (length(a) > 0)
+) FOR VALUES IN ('b');
+-- conislocal should be false for any merged constraints
+SELECT conislocal, coninhcount FROM pg_constraint WHERE conrelid = 'part_b'::regclass AND conname = 'check_a';
+
+-- specify PARTITION BY for a partition
+CREATE TABLE fail_part_col_not_found PARTITION OF parted FOR VALUES IN ('c') PARTITION BY RANGE (c);
+CREATE TABLE part_c PARTITION OF parted FOR VALUES IN ('c') PARTITION BY RANGE ((b));
+
+-- create a level-2 partition
+CREATE TABLE part_c_1_10 PARTITION OF part_c FOR VALUES FROM (1) TO (10);
+
+-- Partition bound in describe output
+\d part_b
+
+-- Both partition bound and partition key in describe output
+\d part_c
+
+-- Show partition count in the parent's describe output
+-- Tempted to include \d+ output listing partitions with bound info but
+-- output could vary depending on the order in which partition oids are
+-- returned.
+\d parted
+
+-- partitions cannot be dropped directly
+DROP TABLE part_a;
+
+-- need to specify CASCADE to drop partitions along with the parent
+DROP TABLE parted;
+
+DROP TABLE parted, list_parted, range_parted, list_parted2, range_parted2, range_parted3 CASCADE;
diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql
index f45aab1ac6..e22a14ebda 100644
--- a/src/test/regress/sql/inherit.sql
+++ b/src/test/regress/sql/inherit.sql
@@ -536,3 +536,55 @@ FROM generate_series(1, 3) g(i);
reset enable_seqscan;
reset enable_indexscan;
reset enable_bitmapscan;
+
+--
+-- Check that constraint exclusion works correctly with partitions using
+-- implicit constraints generated from the partition bound information.
+--
+create table list_parted (
+ a varchar
+) partition by list (a);
+create table part_ab_cd partition of list_parted for values in ('ab', 'cd');
+create table part_ef_gh partition of list_parted for values in ('ef', 'gh');
+create table part_null_xy partition of list_parted for values in (null, 'xy');
+
+explain (costs off) select * from list_parted;
+explain (costs off) select * from list_parted where a is null;
+explain (costs off) select * from list_parted where a is not null;
+explain (costs off) select * from list_parted where a in ('ab', 'cd', 'ef');
+explain (costs off) select * from list_parted where a = 'ab' or a in (null, 'cd');
+explain (costs off) select * from list_parted where a = 'ab';
+
+create table range_list_parted (
+ a int,
+ b char(2)
+) partition by range (a);
+create table part_1_10 partition of range_list_parted for values from (1) to (10) partition by list (b);
+create table part_1_10_ab partition of part_1_10 for values in ('ab');
+create table part_1_10_cd partition of part_1_10 for values in ('cd');
+create table part_10_20 partition of range_list_parted for values from (10) to (20) partition by list (b);
+create table part_10_20_ab partition of part_10_20 for values in ('ab');
+create table part_10_20_cd partition of part_10_20 for values in ('cd');
+create table part_21_30 partition of range_list_parted for values from (21) to (30) partition by list (b);
+create table part_21_30_ab partition of part_21_30 for values in ('ab');
+create table part_21_30_cd partition of part_21_30 for values in ('cd');
+create table part_40_inf partition of range_list_parted for values from (40) to (unbounded) partition by list (b);
+create table part_40_inf_ab partition of part_40_inf for values in ('ab');
+create table part_40_inf_cd partition of part_40_inf for values in ('cd');
+create table part_40_inf_null partition of part_40_inf for values in (null);
+
+explain (costs off) select * from range_list_parted;
+explain (costs off) select * from range_list_parted where a = 5;
+explain (costs off) select * from range_list_parted where b = 'ab';
+explain (costs off) select * from range_list_parted where a between 3 and 23 and b in ('ab');
+
+/* Should select no rows because range partition key cannot be null */
+explain (costs off) select * from range_list_parted where a is null;
+
+/* Should only select rows from the null-accepting partition */
+explain (costs off) select * from range_list_parted where b is null;
+explain (costs off) select * from range_list_parted where a is not null and a < 67;
+explain (costs off) select * from range_list_parted where a >= 30;
+
+drop table list_parted cascade;
+drop table range_list_parted cascade;
diff --git a/src/test/regress/sql/insert.sql b/src/test/regress/sql/insert.sql
index 7924d5d46d..846bb5897a 100644
--- a/src/test/regress/sql/insert.sql
+++ b/src/test/regress/sql/insert.sql
@@ -84,3 +84,89 @@ create rule irule3 as on insert to inserttest2 do also
drop table inserttest2;
drop table inserttest;
drop type insert_test_type;
+
+-- direct partition inserts should check partition bound constraint
+create table range_parted (
+ a text,
+ b int
+) partition by range (a, (b+0));
+create table part1 partition of range_parted for values from ('a', 1) to ('a', 10);
+create table part2 partition of range_parted for values from ('a', 10) to ('a', 20);
+create table part3 partition of range_parted for values from ('b', 1) to ('b', 10);
+create table part4 partition of range_parted for values from ('b', 10) to ('b', 20);
+
+-- fail
+insert into part1 values ('a', 11);
+insert into part1 values ('b', 1);
+-- ok
+insert into part1 values ('a', 1);
+-- fail
+insert into part4 values ('b', 21);
+insert into part4 values ('a', 10);
+-- ok
+insert into part4 values ('b', 10);
+
+-- fail (partition key a has a NOT NULL constraint)
+insert into part1 values (null);
+-- fail (expression key (b+0) cannot be null either)
+insert into part1 values (1);
+
+create table list_parted (
+ a text,
+ b int
+) partition by list (lower(a));
+create table part_aa_bb partition of list_parted FOR VALUES IN ('aa', 'bb');
+create table part_cc_dd partition of list_parted FOR VALUES IN ('cc', 'dd');
+create table part_null partition of list_parted FOR VALUES IN (null);
+
+-- fail
+insert into part_aa_bb values ('cc', 1);
+insert into part_aa_bb values ('AAa', 1);
+insert into part_aa_bb values (null);
+-- ok
+insert into part_cc_dd values ('cC', 1);
+insert into part_null values (null, 0);
+
+-- check in case of multi-level partitioned table
+create table part_ee_ff partition of list_parted for values in ('ee', 'ff') partition by range (b);
+create table part_ee_ff1 partition of part_ee_ff for values from (1) to (10);
+create table part_ee_ff2 partition of part_ee_ff for values from (10) to (20);
+
+-- fail
+insert into part_ee_ff1 values ('EE', 11);
+-- fail (even the parent's, ie, part_ee_ff's partition constraint applies)
+insert into part_ee_ff1 values ('cc', 1);
+-- ok
+insert into part_ee_ff1 values ('ff', 1);
+insert into part_ee_ff2 values ('ff', 11);
+
+-- Check tuple routing for partitioned tables
+
+-- fail
+insert into range_parted values ('a', 0);
+-- ok
+insert into range_parted values ('a', 1);
+insert into range_parted values ('a', 10);
+-- fail
+insert into range_parted values ('a', 20);
+-- ok
+insert into range_parted values ('b', 1);
+insert into range_parted values ('b', 10);
+-- fail (partition key (b+0) is null)
+insert into range_parted values ('a');
+select tableoid::regclass, * from range_parted;
+
+-- ok
+insert into list_parted values (null, 1);
+insert into list_parted (a) values ('aA');
+-- fail (partition of part_ee_ff not found in both cases)
+insert into list_parted values ('EE', 0);
+insert into part_ee_ff values ('EE', 0);
+-- ok
+insert into list_parted values ('EE', 1);
+insert into part_ee_ff values ('EE', 10);
+select tableoid::regclass, * from list_parted;
+
+-- cleanup
+drop table range_parted cascade;
+drop table list_parted cascade;
diff --git a/src/test/regress/sql/update.sql b/src/test/regress/sql/update.sql
index ad58273b38..d7721ed376 100644
--- a/src/test/regress/sql/update.sql
+++ b/src/test/regress/sql/update.sql
@@ -106,3 +106,24 @@ INSERT INTO upsert_test VALUES (1, 'Bat') ON CONFLICT(a)
DROP TABLE update_test;
DROP TABLE upsert_test;
+
+-- update to a partition should check partition bound constraint for the new tuple
+create table range_parted (
+ a text,
+ b int
+) partition by range (a, b);
+create table part_a_1_a_10 partition of range_parted for values from ('a', 1) to ('a', 10);
+create table part_a_10_a_20 partition of range_parted for values from ('a', 10) to ('a', 20);
+create table part_b_1_b_10 partition of range_parted for values from ('b', 1) to ('b', 10);
+create table part_b_10_b_20 partition of range_parted for values from ('b', 10) to ('b', 20);
+insert into part_a_1_a_10 values ('a', 1);
+insert into part_b_10_b_20 values ('b', 10);
+
+-- fail
+update part_a_1_a_10 set a = 'b' where a = 'a';
+update range_parted set b = b - 1 where b = 10;
+-- ok
+update range_parted set b = b + 1 where b = 10;
+
+-- cleanup
+drop table range_parted cascade;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index c680216e8e..c8cc8f7568 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1469,6 +1469,11 @@ ParsedText
ParsedWord
ParserSetupHook
ParserState
+PartitionBoundInfoData
+PartitionBoundSpec
+PartitionCmd
+PartitionListValue
+PartitionRangeBound
Path
PathClauseUsage
PathCostComparison
@@ -1660,6 +1665,7 @@ RWConflictPoolHeader
Range
RangeBound
RangeBox
+RangeDatumContent
RangeFunction
RangeIOData
RangeQueryClause
--
cgit v1.2.3
From a924c327e2793d2025b19e18de7917110dc8afd8 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Thu, 8 Dec 2016 12:00:00 -0500
Subject: Add support for temporary replication slots
This allows creating temporary replication slots that are removed
automatically at the end of the session or on error.
From: Petr Jelinek
---
contrib/test_decoding/Makefile | 2 +-
contrib/test_decoding/expected/ddl.out | 4 +-
contrib/test_decoding/expected/slot.out | 58 +++++++++++++++++++++++++++
contrib/test_decoding/sql/slot.sql | 20 ++++++++++
doc/src/sgml/func.sgml | 16 ++++++--
doc/src/sgml/protocol.sgml | 13 ++++++-
src/backend/catalog/system_views.sql | 11 ++++++
src/backend/replication/repl_gram.y | 22 +++++++----
src/backend/replication/repl_scanner.l | 1 +
src/backend/replication/slot.c | 69 ++++++++++++++++++++++++++-------
src/backend/replication/slotfuncs.c | 24 ++++++++----
src/backend/replication/walsender.c | 28 +++++++------
src/backend/storage/lmgr/proc.c | 3 ++
src/backend/tcop/postgres.c | 3 ++
src/include/catalog/pg_proc.h | 6 +--
src/include/nodes/replnodes.h | 1 +
src/include/replication/slot.h | 4 +-
src/test/regress/expected/rules.out | 3 +-
18 files changed, 237 insertions(+), 51 deletions(-)
create mode 100644 contrib/test_decoding/expected/slot.out
create mode 100644 contrib/test_decoding/sql/slot.sql
(limited to 'doc/src')
diff --git a/contrib/test_decoding/Makefile b/contrib/test_decoding/Makefile
index a6641f5040..d2bc8b8350 100644
--- a/contrib/test_decoding/Makefile
+++ b/contrib/test_decoding/Makefile
@@ -39,7 +39,7 @@ submake-test_decoding:
REGRESSCHECKS=ddl xact rewrite toast permissions decoding_in_xact \
decoding_into_rel binary prepared replorigin time messages \
- spill
+ spill slot
regresscheck: | submake-regress submake-test_decoding temp-install
$(MKDIR_P) regression_output
diff --git a/contrib/test_decoding/expected/ddl.out b/contrib/test_decoding/expected/ddl.out
index a9ba615b5b..c104c4802d 100644
--- a/contrib/test_decoding/expected/ddl.out
+++ b/contrib/test_decoding/expected/ddl.out
@@ -702,7 +702,7 @@ SELECT pg_drop_replication_slot('regression_slot');
/* check that the slot is gone */
SELECT * FROM pg_replication_slots;
- slot_name | plugin | slot_type | datoid | database | active | active_pid | xmin | catalog_xmin | restart_lsn | confirmed_flush_lsn
------------+--------+-----------+--------+----------+--------+------------+------+--------------+-------------+---------------------
+ slot_name | plugin | slot_type | datoid | database | temporary | active | active_pid | xmin | catalog_xmin | restart_lsn | confirmed_flush_lsn
+-----------+--------+-----------+--------+----------+-----------+--------+------------+------+--------------+-------------+---------------------
(0 rows)
diff --git a/contrib/test_decoding/expected/slot.out b/contrib/test_decoding/expected/slot.out
new file mode 100644
index 0000000000..5e6b70ba38
--- /dev/null
+++ b/contrib/test_decoding/expected/slot.out
@@ -0,0 +1,58 @@
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot_p', 'test_decoding');
+ ?column?
+----------
+ init
+(1 row)
+
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot_t', 'test_decoding', true);
+ ?column?
+----------
+ init
+(1 row)
+
+SELECT pg_drop_replication_slot('regression_slot_p');
+ pg_drop_replication_slot
+--------------------------
+
+(1 row)
+
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot_p', 'test_decoding', false);
+ ?column?
+----------
+ init
+(1 row)
+
+-- reconnect to clean temp slots
+\c
+SELECT pg_drop_replication_slot('regression_slot_p');
+ pg_drop_replication_slot
+--------------------------
+
+(1 row)
+
+-- should fail because the temporary slot was dropped automatically
+SELECT pg_drop_replication_slot('regression_slot_t');
+ERROR: replication slot "regression_slot_t" does not exist
+-- test switching between slots in a session
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot1', 'test_decoding', true);
+ ?column?
+----------
+ init
+(1 row)
+
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot2', 'test_decoding', true);
+ ?column?
+----------
+ init
+(1 row)
+
+SELECT * FROM pg_logical_slot_get_changes('regression_slot1', NULL, NULL);
+ location | xid | data
+----------+-----+------
+(0 rows)
+
+SELECT * FROM pg_logical_slot_get_changes('regression_slot2', NULL, NULL);
+ location | xid | data
+----------+-----+------
+(0 rows)
+
diff --git a/contrib/test_decoding/sql/slot.sql b/contrib/test_decoding/sql/slot.sql
new file mode 100644
index 0000000000..3b0aecd6a8
--- /dev/null
+++ b/contrib/test_decoding/sql/slot.sql
@@ -0,0 +1,20 @@
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot_p', 'test_decoding');
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot_t', 'test_decoding', true);
+
+SELECT pg_drop_replication_slot('regression_slot_p');
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot_p', 'test_decoding', false);
+
+-- reconnect to clean temp slots
+\c
+
+SELECT pg_drop_replication_slot('regression_slot_p');
+
+-- should fail because the temporary slot was dropped automatically
+SELECT pg_drop_replication_slot('regression_slot_t');
+
+
+-- test switching between slots in a session
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot1', 'test_decoding', true);
+SELECT 'init' FROM pg_create_logical_replication_slot('regression_slot2', 'test_decoding', true);
+SELECT * FROM pg_logical_slot_get_changes('regression_slot1', NULL, NULL);
+SELECT * FROM pg_logical_slot_get_changes('regression_slot2', NULL, NULL);
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index eca98dfd34..0f9c9bf129 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -18465,7 +18465,7 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
pg_create_physical_replication_slot
- pg_create_physical_replication_slot(slot_namename, immediately_reserve> boolean> )
+ pg_create_physical_replication_slot(slot_namename, immediately_reserve> boolean>, temporary> boolean>)
(slot_namename, xlog_positionpg_lsn)
@@ -18478,7 +18478,11 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
the LSN> is reserved on first connection from a streaming
replication client. Streaming changes from a physical slot is only
possible with the streaming-replication protocol —
- see . This function corresponds
+ see . The optional third
+ parameter, temporary>, when set to true, specifies that
+ the slot should not be permanently stored to disk and is only meant
+ for use by current session. Temporary slots are also
+ released upon any error. This function corresponds
to the replication protocol command CREATE_REPLICATION_SLOT
... PHYSICAL.
@@ -18505,7 +18509,7 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
pg_create_logical_replication_slot
- pg_create_logical_replication_slot(slot_namename, pluginname)
+ pg_create_logical_replication_slot(slot_namename, pluginname, temporary> boolean>)
(slot_namename, xlog_positionpg_lsn)
@@ -18513,7 +18517,11 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
Creates a new logical (decoding) replication slot named
slot_name using the output plugin
- plugin. A call to this function has the same
+ plugin. The optional third
+ parameter, temporary>, when set to true, specifies that
+ the slot should not be permanently stored to disk and is only meant
+ for use by current session. Temporary slots are also
+ released upon any error. A call to this function has the same
effect as the replication protocol command
CREATE_REPLICATION_SLOT ... LOGICAL.
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index 50cf527427..9ba147cae5 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -1434,7 +1434,7 @@ The commands accepted in walsender mode are:
- CREATE_REPLICATION_SLOTslot_name> { PHYSICAL> [ RESERVE_WAL> ] | LOGICAL> output_plugin> }
+ CREATE_REPLICATION_SLOTslot_name> [ TEMPORARY> ] { PHYSICAL> [ RESERVE_WAL> ] | LOGICAL> output_plugin> }
CREATE_REPLICATION_SLOT
@@ -1464,6 +1464,17 @@ The commands accepted in walsender mode are:
+
+ TEMPORARY>
+
+
+ Specify that this replication slot is a temporary one. Temporary
+ slots are not saved to disk and are automatically dropped on error
+ or when the session has finished.
+
+
+
+
RESERVE_WAL>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index df59d1819c..48e7c4b7f9 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -726,6 +726,7 @@ CREATE VIEW pg_replication_slots AS
L.slot_type,
L.datoid,
D.datname AS database,
+ L.temporary,
L.active,
L.active_pid,
L.xmin,
@@ -991,12 +992,22 @@ AS 'pg_logical_slot_peek_binary_changes';
CREATE OR REPLACE FUNCTION pg_create_physical_replication_slot(
IN slot_name name, IN immediately_reserve boolean DEFAULT false,
+ IN temporary boolean DEFAULT false,
OUT slot_name name, OUT xlog_position pg_lsn)
RETURNS RECORD
LANGUAGE INTERNAL
STRICT VOLATILE
AS 'pg_create_physical_replication_slot';
+CREATE OR REPLACE FUNCTION pg_create_logical_replication_slot(
+ IN slot_name name, IN plugin name,
+ IN temporary boolean DEFAULT false,
+ OUT slot_name text, OUT xlog_position pg_lsn)
+RETURNS RECORD
+LANGUAGE INTERNAL
+STRICT VOLATILE
+AS 'pg_create_logical_replication_slot';
+
CREATE OR REPLACE FUNCTION
make_interval(years int4 DEFAULT 0, months int4 DEFAULT 0, weeks int4 DEFAULT 0,
days int4 DEFAULT 0, hours int4 DEFAULT 0, mins int4 DEFAULT 0,
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index fd0fa6dde0..e75516c8d2 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -77,6 +77,7 @@ Node *replication_parse_result;
%token K_LOGICAL
%token K_SLOT
%token K_RESERVE_WAL
+%token K_TEMPORARY
%type command
%type base_backup start_replication start_logical_replication
@@ -89,7 +90,7 @@ Node *replication_parse_result;
%type plugin_opt_elem
%type plugin_opt_arg
%type opt_slot
-%type opt_reserve_wal
+%type opt_reserve_wal opt_temporary
%%
@@ -183,24 +184,26 @@ base_backup_opt:
;
create_replication_slot:
- /* CREATE_REPLICATION_SLOT slot PHYSICAL RESERVE_WAL */
- K_CREATE_REPLICATION_SLOT IDENT K_PHYSICAL opt_reserve_wal
+ /* CREATE_REPLICATION_SLOT slot TEMPORARY PHYSICAL RESERVE_WAL */
+ K_CREATE_REPLICATION_SLOT IDENT opt_temporary K_PHYSICAL opt_reserve_wal
{
CreateReplicationSlotCmd *cmd;
cmd = makeNode(CreateReplicationSlotCmd);
cmd->kind = REPLICATION_KIND_PHYSICAL;
cmd->slotname = $2;
- cmd->reserve_wal = $4;
+ cmd->temporary = $3;
+ cmd->reserve_wal = $5;
$$ = (Node *) cmd;
}
- /* CREATE_REPLICATION_SLOT slot LOGICAL plugin */
- | K_CREATE_REPLICATION_SLOT IDENT K_LOGICAL IDENT
+ /* CREATE_REPLICATION_SLOT slot TEMPORARY LOGICAL plugin */
+ | K_CREATE_REPLICATION_SLOT IDENT opt_temporary K_LOGICAL IDENT
{
CreateReplicationSlotCmd *cmd;
cmd = makeNode(CreateReplicationSlotCmd);
cmd->kind = REPLICATION_KIND_LOGICAL;
cmd->slotname = $2;
- cmd->plugin = $4;
+ cmd->temporary = $3;
+ cmd->plugin = $5;
$$ = (Node *) cmd;
}
;
@@ -276,6 +279,11 @@ opt_reserve_wal:
| /* EMPTY */ { $$ = false; }
;
+opt_temporary:
+ K_TEMPORARY { $$ = true; }
+ | /* EMPTY */ { $$ = false; }
+ ;
+
opt_slot:
K_SLOT IDENT
{ $$ = $2; }
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index f83ec538b6..9f50ce64a5 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -98,6 +98,7 @@ PHYSICAL { return K_PHYSICAL; }
RESERVE_WAL { return K_RESERVE_WAL; }
LOGICAL { return K_LOGICAL; }
SLOT { return K_SLOT; }
+TEMPORARY { return K_TEMPORARY; }
"," { return ','; }
";" { return ';'; }
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index 0b2575ee9d..d8ed005e7e 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -47,6 +47,7 @@
#include "storage/fd.h"
#include "storage/proc.h"
#include "storage/procarray.h"
+#include "utils/builtins.h"
/*
* Replication slot on-disk data structure.
@@ -98,7 +99,9 @@ int max_replication_slots = 0; /* the maximum number of replication
* slots */
static LWLockTranche ReplSlotIOLWLockTranche;
+
static void ReplicationSlotDropAcquired(void);
+static void ReplicationSlotDropPtr(ReplicationSlot *slot);
/* internal persistency functions */
static void RestoreSlotFromDisk(const char *name);
@@ -329,7 +332,7 @@ ReplicationSlotAcquire(const char *name)
{
ReplicationSlot *slot = NULL;
int i;
- int active_pid = 0;
+ int active_pid = 0; /* Keep compiler quiet */
Assert(MyReplicationSlot == NULL);
@@ -346,7 +349,7 @@ ReplicationSlotAcquire(const char *name)
SpinLockAcquire(&s->mutex);
active_pid = s->active_pid;
if (active_pid == 0)
- s->active_pid = MyProcPid;
+ active_pid = s->active_pid = MyProcPid;
SpinLockRelease(&s->mutex);
slot = s;
break;
@@ -359,7 +362,7 @@ ReplicationSlotAcquire(const char *name)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("replication slot \"%s\" does not exist", name)));
- if (active_pid != 0)
+ if (active_pid != MyProcPid)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_IN_USE),
errmsg("replication slot \"%s\" is active for PID %d",
@@ -389,9 +392,12 @@ ReplicationSlotRelease(void)
*/
ReplicationSlotDropAcquired();
}
- else
+ else if (slot->data.persistency == RS_PERSISTENT)
{
- /* Mark slot inactive. We're not freeing it, just disconnecting. */
+ /*
+ * Mark persistent slot inactive. We're not freeing it, just
+ * disconnecting.
+ */
SpinLockAcquire(&slot->mutex);
slot->active_pid = 0;
SpinLockRelease(&slot->mutex);
@@ -405,6 +411,33 @@ ReplicationSlotRelease(void)
LWLockRelease(ProcArrayLock);
}
+/*
+ * Cleanup all temporary slots created in current session.
+ */
+void
+ReplicationSlotCleanup()
+{
+ int i;
+
+ Assert(MyReplicationSlot == NULL);
+
+ /*
+ * No need for locking as we are only interested in slots active in
+ * current process and those are not touched by other processes.
+ */
+ for (i = 0; i < max_replication_slots; i++)
+ {
+ ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
+
+ if (s->active_pid == MyProcPid)
+ {
+ Assert(s->in_use && s->data.persistency == RS_TEMPORARY);
+
+ ReplicationSlotDropPtr(s);
+ }
+ }
+}
+
/*
* Permanently drop replication slot identified by the passed in name.
*/
@@ -419,14 +452,11 @@ ReplicationSlotDrop(const char *name)
}
/*
- * Permanently drop the currently acquired replication slot which will be
- * released by the point this function returns.
+ * Permanently drop the currently acquired replication slot.
*/
static void
ReplicationSlotDropAcquired(void)
{
- char path[MAXPGPATH];
- char tmppath[MAXPGPATH];
ReplicationSlot *slot = MyReplicationSlot;
Assert(MyReplicationSlot != NULL);
@@ -434,6 +464,19 @@ ReplicationSlotDropAcquired(void)
/* slot isn't acquired anymore */
MyReplicationSlot = NULL;
+ ReplicationSlotDropPtr(slot);
+}
+
+/*
+ * Permanently drop the replication slot which will be released by the point
+ * this function returns.
+ */
+static void
+ReplicationSlotDropPtr(ReplicationSlot *slot)
+{
+ char path[MAXPGPATH];
+ char tmppath[MAXPGPATH];
+
/*
* If some other backend ran this code concurrently with us, we might try
* to delete a slot with a certain name while someone else was trying to
@@ -448,9 +491,9 @@ ReplicationSlotDropAcquired(void)
/*
* Rename the slot directory on disk, so that we'll no longer recognize
* this as a valid slot. Note that if this fails, we've got to mark the
- * slot inactive before bailing out. If we're dropping an ephemeral slot,
- * we better never fail hard as the caller won't expect the slot to
- * survive and this might get called during error handling.
+ * slot inactive before bailing out. If we're dropping an ephemeral or
+ * a temporary slot, we better never fail hard as the caller won't expect
+ * the slot to survive and this might get called during error handling.
*/
if (rename(path, tmppath) == 0)
{
@@ -469,7 +512,7 @@ ReplicationSlotDropAcquired(void)
}
else
{
- bool fail_softly = slot->data.persistency == RS_EPHEMERAL;
+ bool fail_softly = slot->data.persistency != RS_PERSISTENT;
SpinLockAcquire(&slot->mutex);
slot->active_pid = 0;
diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c
index f9087619d2..1f1c56cc21 100644
--- a/src/backend/replication/slotfuncs.c
+++ b/src/backend/replication/slotfuncs.c
@@ -41,6 +41,7 @@ pg_create_physical_replication_slot(PG_FUNCTION_ARGS)
{
Name name = PG_GETARG_NAME(0);
bool immediately_reserve = PG_GETARG_BOOL(1);
+ bool temporary = PG_GETARG_BOOL(2);
Datum values[2];
bool nulls[2];
TupleDesc tupdesc;
@@ -57,7 +58,8 @@ pg_create_physical_replication_slot(PG_FUNCTION_ARGS)
CheckSlotRequirements();
/* acquire replication slot, this will check for conflicting names */
- ReplicationSlotCreate(NameStr(*name), false, RS_PERSISTENT);
+ ReplicationSlotCreate(NameStr(*name), false,
+ temporary ? RS_TEMPORARY : RS_PERSISTENT);
values[0] = NameGetDatum(&MyReplicationSlot->data.name);
nulls[0] = false;
@@ -96,6 +98,7 @@ pg_create_logical_replication_slot(PG_FUNCTION_ARGS)
{
Name name = PG_GETARG_NAME(0);
Name plugin = PG_GETARG_NAME(1);
+ bool temporary = PG_GETARG_BOOL(2);
LogicalDecodingContext *ctx = NULL;
@@ -116,11 +119,14 @@ pg_create_logical_replication_slot(PG_FUNCTION_ARGS)
/*
* Acquire a logical decoding slot, this will check for conflicting names.
- * Initially create it as ephemeral - that allows us to nicely handle
- * errors during initialization because it'll get dropped if this
+ * Initially create persisent slot as ephemeral - that allows us to nicely
+ * handle errors during initialization because it'll get dropped if this
* transaction fails. We'll make it persistent at the end.
+ * Temporary slots can be created as temporary from beginning as they get
+ * dropped on error as well.
*/
- ReplicationSlotCreate(NameStr(*name), true, RS_EPHEMERAL);
+ ReplicationSlotCreate(NameStr(*name), true,
+ temporary ? RS_TEMPORARY : RS_EPHEMERAL);
/*
* Create logical decoding context, to build the initial snapshot.
@@ -143,8 +149,9 @@ pg_create_logical_replication_slot(PG_FUNCTION_ARGS)
tuple = heap_form_tuple(tupdesc, values, nulls);
result = HeapTupleGetDatum(tuple);
- /* ok, slot is now fully created, mark it as persistent */
- ReplicationSlotPersist();
+ /* ok, slot is now fully created, mark it as persistent if needed */
+ if (!temporary)
+ ReplicationSlotPersist();
ReplicationSlotRelease();
PG_RETURN_DATUM(result);
@@ -174,7 +181,7 @@ pg_drop_replication_slot(PG_FUNCTION_ARGS)
Datum
pg_get_replication_slots(PG_FUNCTION_ARGS)
{
-#define PG_GET_REPLICATION_SLOTS_COLS 10
+#define PG_GET_REPLICATION_SLOTS_COLS 11
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
TupleDesc tupdesc;
Tuplestorestate *tupstore;
@@ -219,6 +226,7 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
Datum values[PG_GET_REPLICATION_SLOTS_COLS];
bool nulls[PG_GET_REPLICATION_SLOTS_COLS];
+ ReplicationSlotPersistency persistency;
TransactionId xmin;
TransactionId catalog_xmin;
XLogRecPtr restart_lsn;
@@ -246,6 +254,7 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
namecpy(&plugin, &slot->data.plugin);
active_pid = slot->active_pid;
+ persistency = slot->data.persistency;
}
SpinLockRelease(&slot->mutex);
@@ -269,6 +278,7 @@ pg_get_replication_slots(PG_FUNCTION_ARGS)
else
values[i++] = database;
+ values[i++] = BoolGetDatum(persistency == RS_TEMPORARY);
values[i++] = BoolGetDatum(active_pid != 0);
if (active_pid != 0)
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index aa42d59610..b14d82153a 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -266,6 +266,8 @@ WalSndErrorCleanup(void)
if (MyReplicationSlot != NULL)
ReplicationSlotRelease();
+ ReplicationSlotCleanup();
+
replication_active = false;
if (walsender_ready_to_stop)
proc_exit(0);
@@ -796,18 +798,22 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd)
if (cmd->kind == REPLICATION_KIND_PHYSICAL)
{
- ReplicationSlotCreate(cmd->slotname, false, RS_PERSISTENT);
+ ReplicationSlotCreate(cmd->slotname, false,
+ cmd->temporary ? RS_TEMPORARY : RS_PERSISTENT);
}
else
{
CheckLogicalDecodingRequirements();
/*
- * Initially create the slot as ephemeral - that allows us to nicely
- * handle errors during initialization because it'll get dropped if
- * this transaction fails. We'll make it persistent at the end.
+ * Initially create persisent slot as ephemeral - that allows us to
+ * nicely handle errors during initialization because it'll get
+ * dropped if this transaction fails. We'll make it persistent at the
+ * end. Temporary slots can be created as temporary from beginning as
+ * they get dropped on error as well.
*/
- ReplicationSlotCreate(cmd->slotname, true, RS_EPHEMERAL);
+ ReplicationSlotCreate(cmd->slotname, true,
+ cmd->temporary ? RS_TEMPORARY : RS_EPHEMERAL);
}
initStringInfo(&output_message);
@@ -841,15 +847,18 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd)
/* don't need the decoding context anymore */
FreeDecodingContext(ctx);
- ReplicationSlotPersist();
+ if (!cmd->temporary)
+ ReplicationSlotPersist();
}
else if (cmd->kind == REPLICATION_KIND_PHYSICAL && cmd->reserve_wal)
{
ReplicationSlotReserveWal();
- /* Write this slot to disk */
ReplicationSlotMarkDirty();
- ReplicationSlotSave();
+
+ /* Write this slot to disk if it's permanent one. */
+ if (!cmd->temporary)
+ ReplicationSlotSave();
}
snprintf(xpos, sizeof(xpos), "%X/%X",
@@ -933,9 +942,6 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd)
pq_endmessage(&buf);
- /*
- * release active status again, START_REPLICATION will reacquire it
- */
ReplicationSlotRelease();
}
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 83e9ca15d1..276261bd7b 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -810,6 +810,9 @@ ProcKill(int code, Datum arg)
if (MyReplicationSlot != NULL)
ReplicationSlotRelease();
+ /* Also cleanup all the temporary slots. */
+ ReplicationSlotCleanup();
+
/*
* Detach from any lock group of which we are a member. If the leader
* exist before all other group members, it's PGPROC will remain allocated
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index cc847548a9..b17923106a 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -3878,6 +3878,9 @@ PostgresMain(int argc, char *argv[],
if (MyReplicationSlot != NULL)
ReplicationSlotRelease();
+ /* We also want to cleanup temporary slots on error. */
+ ReplicationSlotCleanup();
+
/*
* Now return to normal top-level context and clear ErrorContext for
* next time.
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 96e77ec437..cd7b909812 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -5178,13 +5178,13 @@ DATA(insert OID = 5016 ( spg_box_quad_leaf_consistent PGNSP PGUID 12 1 0 0 0 f
DESCR("SP-GiST support for quad tree over box");
/* replication slots */
-DATA(insert OID = 3779 ( pg_create_physical_replication_slot PGNSP PGUID 12 1 0 0 0 f f f f t f v u 2 0 2249 "19 16" "{19,16,19,3220}" "{i,i,o,o}" "{slot_name,immediately_reserve,slot_name,xlog_position}" _null_ _null_ pg_create_physical_replication_slot _null_ _null_ _null_ ));
+DATA(insert OID = 3779 ( pg_create_physical_replication_slot PGNSP PGUID 12 1 0 0 0 f f f f t f v u 3 0 2249 "19 16 16" "{19,16,16,19,3220}" "{i,i,i,o,o}" "{slot_name,immediately_reserve,temporary,slot_name,xlog_position}" _null_ _null_ pg_create_physical_replication_slot _null_ _null_ _null_ ));
DESCR("create a physical replication slot");
DATA(insert OID = 3780 ( pg_drop_replication_slot PGNSP PGUID 12 1 0 0 0 f f f f t f v u 1 0 2278 "19" _null_ _null_ _null_ _null_ _null_ pg_drop_replication_slot _null_ _null_ _null_ ));
DESCR("drop a replication slot");
-DATA(insert OID = 3781 ( pg_get_replication_slots PGNSP PGUID 12 1 10 0 0 f f f f f t s s 0 0 2249 "" "{19,19,25,26,16,23,28,28,3220,3220}" "{o,o,o,o,o,o,o,o,o,o}" "{slot_name,plugin,slot_type,datoid,active,active_pid,xmin,catalog_xmin,restart_lsn,confirmed_flush_lsn}" _null_ _null_ pg_get_replication_slots _null_ _null_ _null_ ));
+DATA(insert OID = 3781 ( pg_get_replication_slots PGNSP PGUID 12 1 10 0 0 f f f f f t s s 0 0 2249 "" "{19,19,25,26,16,16,23,28,28,3220,3220}" "{o,o,o,o,o,o,o,o,o,o,o}" "{slot_name,plugin,slot_type,datoid,temporary,active,active_pid,xmin,catalog_xmin,restart_lsn,confirmed_flush_lsn}" _null_ _null_ pg_get_replication_slots _null_ _null_ _null_ ));
DESCR("information about replication slots currently in use");
-DATA(insert OID = 3786 ( pg_create_logical_replication_slot PGNSP PGUID 12 1 0 0 0 f f f f t f v u 2 0 2249 "19 19" "{19,19,25,3220}" "{i,i,o,o}" "{slot_name,plugin,slot_name,xlog_position}" _null_ _null_ pg_create_logical_replication_slot _null_ _null_ _null_ ));
+DATA(insert OID = 3786 ( pg_create_logical_replication_slot PGNSP PGUID 12 1 0 0 0 f f f f t f v u 3 0 2249 "19 19 16" "{19,19,16,25,3220}" "{i,i,i,o,o}" "{slot_name,plugin,temporary,slot_name,xlog_position}" _null_ _null_ pg_create_logical_replication_slot _null_ _null_ _null_ ));
DESCR("set up a logical replication slot");
DATA(insert OID = 3782 ( pg_logical_slot_get_changes PGNSP PGUID 12 1000 1000 25 0 f f f f f t v u 4 0 2249 "19 3220 23 1009" "{19,3220,23,1009,3220,28,25}" "{i,i,i,v,o,o,o}" "{slot_name,upto_lsn,upto_nchanges,options,location,xid,data}" _null_ _null_ pg_logical_slot_get_changes _null_ _null_ _null_ ));
DESCR("get changes from replication slot");
diff --git a/src/include/nodes/replnodes.h b/src/include/nodes/replnodes.h
index d2f1edbf0d..024b965a24 100644
--- a/src/include/nodes/replnodes.h
+++ b/src/include/nodes/replnodes.h
@@ -55,6 +55,7 @@ typedef struct CreateReplicationSlotCmd
char *slotname;
ReplicationKind kind;
char *plugin;
+ bool temporary;
bool reserve_wal;
} CreateReplicationSlotCmd;
diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h
index e00562d274..b653e5c196 100644
--- a/src/include/replication/slot.h
+++ b/src/include/replication/slot.h
@@ -28,7 +28,8 @@
typedef enum ReplicationSlotPersistency
{
RS_PERSISTENT,
- RS_EPHEMERAL
+ RS_EPHEMERAL,
+ RS_TEMPORARY
} ReplicationSlotPersistency;
/*
@@ -165,6 +166,7 @@ extern void ReplicationSlotDrop(const char *name);
extern void ReplicationSlotAcquire(const char *name);
extern void ReplicationSlotRelease(void);
+extern void ReplicationSlotCleanup(void);
extern void ReplicationSlotSave(void);
extern void ReplicationSlotMarkDirty(void);
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index a8f35a76fa..5314b9c207 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1427,13 +1427,14 @@ pg_replication_slots| SELECT l.slot_name,
l.slot_type,
l.datoid,
d.datname AS database,
+ l.temporary,
l.active,
l.active_pid,
l.xmin,
l.catalog_xmin,
l.restart_lsn,
l.confirmed_flush_lsn
- FROM (pg_get_replication_slots() l(slot_name, plugin, slot_type, datoid, active, active_pid, xmin, catalog_xmin, restart_lsn, confirmed_flush_lsn)
+ FROM (pg_get_replication_slots() l(slot_name, plugin, slot_type, datoid, temporary, active, active_pid, xmin, catalog_xmin, restart_lsn, confirmed_flush_lsn)
LEFT JOIN pg_database d ON ((l.datoid = d.oid)));
pg_roles| SELECT pg_authid.rolname,
pg_authid.rolsuper,
--
cgit v1.2.3
From b4630e01fd4c73c195025b7307ebc13d489b9ef9 Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Mon, 12 Dec 2016 13:43:48 -0500
Subject: doc: Fix purported type of pg_am.amhandler to match reality.
Joel Jacobson
---
doc/src/sgml/catalogs.sgml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 9d2e89523d..11c2019106 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -571,7 +571,7 @@
amhandler
- oid
+ regprocpg_proc.oid
OID of a handler function that is responsible for supplying information
--
cgit v1.2.3
From a1a4459c299a86f909c27e391a10d7b9b05ea697 Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Tue, 13 Dec 2016 08:18:00 -0500
Subject: doc: Improve documentation related to table partitioning feature.
Commit f0e44751d7175fa3394da2c8f85e3ceb3cdbfe63 implemented table
partitioning, but failed to mention the "no row movement"
restriction in the documentation. Fix that and a few other issues.
Amit Langote, with some additional wordsmithing by me.
---
doc/src/sgml/ref/alter_table.sgml | 4 ++--
doc/src/sgml/ref/create_table.sgml | 25 +++++++++++++------------
doc/src/sgml/ref/insert.sgml | 11 +++++++++++
doc/src/sgml/ref/update.sgml | 8 ++++++++
4 files changed, 34 insertions(+), 14 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml
index a6a43c4b30..333b01db36 100644
--- a/doc/src/sgml/ref/alter_table.sgml
+++ b/doc/src/sgml/ref/alter_table.sgml
@@ -715,7 +715,7 @@ ALTER TABLE [ IF EXISTS ] name
- ATTACH PARTITIONpartition_namepartition_bound_spec
+ ATTACH PARTITIONpartition_name FOR VALUES partition_bound_spec
This form attaches an existing table (which might itself be partitioned)
@@ -1332,7 +1332,7 @@ ALTER TABLE measurement
Attach a partition to list partitioned table:
ALTER TABLE cities
- ATTACH PARTITION cities_west FOR VALUES IN ('Los Angeles', 'San Francisco');
+ ATTACH PARTITION cities_ab FOR VALUES IN ('a', 'b');
diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml
index 8bf8af302b..58f8bf6d6a 100644
--- a/doc/src/sgml/ref/create_table.sgml
+++ b/doc/src/sgml/ref/create_table.sgml
@@ -248,7 +248,7 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI
- PARTITION OF parent_table
+ PARTITION OF parent_table FOR VALUES partition_bound_spec
Creates the table as partition of the specified
@@ -275,7 +275,8 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI
Rows inserted into a partitioned table will be automatically routed to
the correct partition. If no suitable partition exists, an error will
- occur.
+ occur. Also, if updating a row in a given partition causes it to move
+ to another partition due to the new partition key, an error will occur.
@@ -1477,7 +1478,6 @@ CREATE TABLE employees OF employee_type (
Create a range partitioned table:
CREATE TABLE measurement (
- city_id int not null,
logdate date not null,
peaktemp int,
unitsales int
@@ -1488,9 +1488,10 @@ CREATE TABLE measurement (
Create a list partitioned table:
CREATE TABLE cities (
+ city_id bigserial not null,
name text not null,
- population int,
-) PARTITION BY LIST (initcap(name));
+ population bigint,
+) PARTITION BY LIST (left(lower(name), 1));
@@ -1498,30 +1499,30 @@ CREATE TABLE cities (
CREATE TABLE measurement_y2016m07
PARTITION OF measurement (
- unitsales WITH OPTIONS DEFAULT 0
+ unitsales DEFAULT 0
) FOR VALUES FROM ('2016-07-01') TO ('2016-08-01');
Create partition of a list partitioned table:
-CREATE TABLE cities_west
+CREATE TABLE cities_ab
PARTITION OF cities (
CONSTRAINT city_id_nonzero CHECK (city_id != 0)
-) FOR VALUES IN ('Los Angeles', 'San Francisco');
+) FOR VALUES IN ('a', 'b');
Create partition of a list partitioned table that is itself further
partitioned and then add a partition to it:
-CREATE TABLE cities_west
+CREATE TABLE cities_ab
PARTITION OF cities (
CONSTRAINT city_id_nonzero CHECK (city_id != 0)
-) FOR VALUES IN ('Los Angeles', 'San Francisco') PARTITION BY RANGE (population);
+) FOR VALUES IN ('a', 'b') PARTITION BY RANGE (population);
-CREATE TABLE cities_west_10000_to_100000
- PARTITION OF cities_west FOR VALUES FROM (10000) TO (100000);
+CREATE TABLE cities_ab_10000_to_100000
+ PARTITION OF cities_ab FOR VALUES FROM (10000) TO (100000);
diff --git a/doc/src/sgml/ref/insert.sgml b/doc/src/sgml/ref/insert.sgml
index 06f416039b..00c984d8d5 100644
--- a/doc/src/sgml/ref/insert.sgml
+++ b/doc/src/sgml/ref/insert.sgml
@@ -526,6 +526,17 @@ INSERT oidcount
+
+
+ Notes
+
+
+ If the specified table is a partitioned table, each row is routed to
+ the appropriate partition and inserted into it. If the specified table
+ is a partition, an error will occur if one of the input rows violates
+ the partition constraint.
+
+ Examples
diff --git a/doc/src/sgml/ref/update.sgml b/doc/src/sgml/ref/update.sgml
index 2de0f4aad1..8a1619fb68 100644
--- a/doc/src/sgml/ref/update.sgml
+++ b/doc/src/sgml/ref/update.sgml
@@ -279,6 +279,14 @@ UPDATE count
sub-selects is safer, though often harder to read and slower than
using a join.
+
+
+ In the case of a partitioned table, updating a row might cause it to no
+ longer satisfy the partition constraint. Since there is no provision to
+ move the row to the partition appropriate to the new value of its
+ partitioning key, an error will occur in this case. This can also happen
+ when updating a partition directly.
+
--
cgit v1.2.3
From 4e344c2cf4ff00ca38ea0035bc137dab95fdd0c0 Mon Sep 17 00:00:00 2001
From: Fujii Masao
Date: Sat, 17 Dec 2016 01:25:29 +0900
Subject: Add missing documentation for effective_io_concurrency tablespace
option.
The description of effective_io_concurrency option was missing in ALTER
TABLESPACE docs though it's included in CREATE TABLESPACE one.
Back-patch to 9.6 where effective_io_concurrency tablespace option was added.
Michael Paquier, reported by Marc-Olaf Jaschke
---
doc/src/sgml/ref/alter_tablespace.sgml | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/alter_tablespace.sgml b/doc/src/sgml/ref/alter_tablespace.sgml
index d9b2a133b1..2f41105001 100644
--- a/doc/src/sgml/ref/alter_tablespace.sgml
+++ b/doc/src/sgml/ref/alter_tablespace.sgml
@@ -83,14 +83,15 @@ ALTER TABLESPACE name RESET ( ,
- ). This may be useful if one
- tablespace is located on a disk which is faster or slower than the
+ available parameters are seq_page_cost>,
+ random_page_cost> and effective_io_concurrency>.
+ Setting either value for a particular tablespace will override the
+ planner's usual estimate of the cost of reading pages from tables in
+ that tablespace, as established by the configuration parameters of the
+ same name (see ,
+ ,
+ ). This may be useful if
+ one tablespace is located on a disk which is faster or slower than the
remainder of the I/O subsystem.
--
cgit v1.2.3
From 3761fe3c20bb040b15f0e8da58d824631da00caa Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Fri, 16 Dec 2016 11:29:23 -0500
Subject: Simplify LWLock tranche machinery by removing
array_base/array_stride.
array_base and array_stride were added so that we could identify the
offset of an LWLock within a tranche, but this facility is only very
marginally used apart from the main tranche. So, give every lock in
the main tranche its own tranche ID and get rid of array_base,
array_stride, and all that's attached. For debugging facilities
(Trace_lwlocks and LWLOCK_STATS) print the pointer address of the
LWLock using %p instead of the offset. This is arguably more useful,
and certainly a lot cheaper. Drop the offset-within-tranche from
the information reported to dtrace and from one can't-happen message
inside lwlock.c.
The main user-visible impact of this change is that pg_stat_activity
will now report all waits for LWLocks as "LWLock" rather than
reporting some as "LWLockTranche" and others as "LWLockNamed".
The main motivation for this change is that the need to specify an
array_base and an array_stride is awkward for parallel query. There
is only a very limited supply of tranche IDs so we can't just keep
allocating new ones, and if we try to use the same tranche IDs every
time then we run into trouble when multiple parallel contexts are
use simultaneously. So if we didn't get rid of this mechanism we'd
have to make it even more complicated. By simplifying it in this
way, we instead reduce the size of the generated code for lwlock.c
by about 5%.
Discussion: http://postgr.es/m/CA+TgmoYsFn6NUW1x0AZtupJGUAs1UDY4dJtCN47_Q6D0sP80PA@mail.gmail.com
---
doc/src/sgml/monitoring.sgml | 52 ++++-----
src/backend/access/transam/slru.c | 6 +-
src/backend/access/transam/xlog.c | 9 +-
src/backend/postmaster/pgstat.c | 10 +-
src/backend/replication/logical/origin.c | 8 +-
src/backend/replication/slot.c | 8 +-
src/backend/storage/buffer/buf_init.c | 16 +--
src/backend/storage/ipc/procarray.c | 9 +-
src/backend/storage/lmgr/lwlock.c | 175 ++++++++++---------------------
src/backend/utils/mmgr/dsa.c | 15 +--
src/backend/utils/probes.d | 16 +--
src/include/access/slru.h | 1 -
src/include/pgstat.h | 3 +-
src/include/storage/lwlock.h | 45 ++------
14 files changed, 112 insertions(+), 261 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 128ee13b5f..5b58d2e84d 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -646,18 +646,11 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
- LWLockNamed>: The backend is waiting for a specific named
- lightweight lock. Each such lock protects a particular data
- structure in shared memory. wait_event> will contain
- the name of the lightweight lock.
-
-
-
-
- LWLockTranche>: The backend is waiting for one of a
- group of related lightweight locks. All locks in the group perform
- a similar function; wait_event> will identify the general
- purpose of locks in that group.
+ LWLock>: The backend is waiting for a lightweight lock.
+ Each such lock protects a particular data structure in shared memory.
+ wait_event> will contain a name identifying the purpose
+ of the lightweight lock. (Some locks have specific names; others
+ are part of a group of locks each with a similar purpose.)
@@ -825,7 +818,7 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
- LWLockNamed>
+ LWLock>ShmemIndexLock>Waiting to find or allocate space in shared memory.
@@ -1011,7 +1004,6 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
Waiting to read or update old snapshot control information.
- LWLockTranche>clog>Waiting for I/O on a clog (transaction status) buffer.
@@ -1279,7 +1271,7 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
pid | wait_event_type | wait_event
------+-----------------+---------------
2540 | Lock | relation
- 6644 | LWLockNamed | ProcArrayLock
+ 6644 | LWLock | ProcArrayLock
(2 rows)
@@ -3347,55 +3339,49 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid,
lwlock-acquire
- (char *, int, LWLockMode)
+ (char *, LWLockMode)Probe that fires when an LWLock has been acquired.
arg0 is the LWLock's tranche.
- arg1 is the LWLock's offset within its tranche.
- arg2 is the requested lock mode, either exclusive or shared.
+ arg1 is the requested lock mode, either exclusive or shared.
lwlock-release
- (char *, int)
+ (char *)Probe that fires when an LWLock has been released (but note
that any released waiters have not yet been awakened).
- arg0 is the LWLock's tranche.
- arg1 is the LWLock's offset within its tranche.
+ arg0 is the LWLock's tranche.
lwlock-wait-start
- (char *, int, LWLockMode)
+ (char *, LWLockMode)Probe that fires when an LWLock was not immediately available and
a server process has begun to wait for the lock to become available.
arg0 is the LWLock's tranche.
- arg1 is the LWLock's offset within its tranche.
- arg2 is the requested lock mode, either exclusive or shared.
+ arg1 is the requested lock mode, either exclusive or shared.
lwlock-wait-done
- (char *, int, LWLockMode)
+ (char *, LWLockMode)Probe that fires when a server process has been released from its
wait for an LWLock (it does not actually have the lock yet).
arg0 is the LWLock's tranche.
- arg1 is the LWLock's offset within its tranche.
- arg2 is the requested lock mode, either exclusive or shared.
+ arg1 is the requested lock mode, either exclusive or shared.
lwlock-condacquire
- (char *, int, LWLockMode)
+ (char *, LWLockMode)Probe that fires when an LWLock was successfully acquired when the
caller specified no waiting.
arg0 is the LWLock's tranche.
- arg1 is the LWLock's offset within its tranche.
- arg2 is the requested lock mode, either exclusive or shared.
+ arg1 is the requested lock mode, either exclusive or shared.
lwlock-condacquire-fail
- (char *, int, LWLockMode)
+ (char *, LWLockMode)Probe that fires when an LWLock was not successfully acquired when
the caller specified no waiting.
arg0 is the LWLock's tranche.
- arg1 is the LWLock's offset within its tranche.
- arg2 is the requested lock mode, either exclusive or shared.
+ arg1 is the requested lock mode, either exclusive or shared.
lock-wait-start
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index bbae5847f2..8b95f35dba 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -216,9 +216,6 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
Assert(strlen(name) + 1 < SLRU_MAX_NAME_LENGTH);
strlcpy(shared->lwlock_tranche_name, name, SLRU_MAX_NAME_LENGTH);
shared->lwlock_tranche_id = tranche_id;
- shared->lwlock_tranche.name = shared->lwlock_tranche_name;
- shared->lwlock_tranche.array_base = shared->buffer_locks;
- shared->lwlock_tranche.array_stride = sizeof(LWLockPadded);
ptr += BUFFERALIGN(offset);
for (slotno = 0; slotno < nslots; slotno++)
@@ -237,7 +234,8 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
Assert(found);
/* Register SLRU tranche in the main tranches array */
- LWLockRegisterTranche(shared->lwlock_tranche_id, &shared->lwlock_tranche);
+ LWLockRegisterTranche(shared->lwlock_tranche_id,
+ shared->lwlock_tranche_name);
/*
* Initialize the unshared control struct, including directory path. We
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 084401d2f2..aa9ee5a0dd 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -517,7 +517,6 @@ typedef struct XLogCtlInsert
* WAL insertion locks.
*/
WALInsertLockPadded *WALInsertLocks;
- LWLockTranche WALInsertLockTranche;
} XLogCtlInsert;
/*
@@ -4688,7 +4687,7 @@ XLOGShmemInit(void)
/* Initialize local copy of WALInsertLocks and register the tranche */
WALInsertLocks = XLogCtl->Insert.WALInsertLocks;
LWLockRegisterTranche(LWTRANCHE_WAL_INSERT,
- &XLogCtl->Insert.WALInsertLockTranche);
+ "wal_insert");
return;
}
memset(XLogCtl, 0, sizeof(XLogCtlData));
@@ -4711,11 +4710,7 @@ XLOGShmemInit(void)
(WALInsertLockPadded *) allocptr;
allocptr += sizeof(WALInsertLockPadded) * NUM_XLOGINSERT_LOCKS;
- XLogCtl->Insert.WALInsertLockTranche.name = "wal_insert";
- XLogCtl->Insert.WALInsertLockTranche.array_base = WALInsertLocks;
- XLogCtl->Insert.WALInsertLockTranche.array_stride = sizeof(WALInsertLockPadded);
-
- LWLockRegisterTranche(LWTRANCHE_WAL_INSERT, &XLogCtl->Insert.WALInsertLockTranche);
+ LWLockRegisterTranche(LWTRANCHE_WAL_INSERT, "wal_insert");
for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
{
LWLockInitialize(&WALInsertLocks[i].l.lock, LWTRANCHE_WAL_INSERT);
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index c7584cb1d3..61e6a2cd33 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -3152,11 +3152,8 @@ pgstat_get_wait_event_type(uint32 wait_event_info)
switch (classId)
{
- case PG_WAIT_LWLOCK_NAMED:
- event_type = "LWLockNamed";
- break;
- case PG_WAIT_LWLOCK_TRANCHE:
- event_type = "LWLockTranche";
+ case PG_WAIT_LWLOCK:
+ event_type = "LWLock";
break;
case PG_WAIT_LOCK:
event_type = "Lock";
@@ -3209,8 +3206,7 @@ pgstat_get_wait_event(uint32 wait_event_info)
switch (classId)
{
- case PG_WAIT_LWLOCK_NAMED:
- case PG_WAIT_LWLOCK_TRANCHE:
+ case PG_WAIT_LWLOCK:
event_name = GetLWLockIdentifier(classId, eventId);
break;
case PG_WAIT_LOCK:
diff --git a/src/backend/replication/logical/origin.c b/src/backend/replication/logical/origin.c
index cc2b513236..0deec75dc2 100644
--- a/src/backend/replication/logical/origin.c
+++ b/src/backend/replication/logical/origin.c
@@ -143,7 +143,6 @@ typedef struct ReplicationStateOnDisk
typedef struct ReplicationStateCtl
{
int tranche_id;
- LWLockTranche tranche;
ReplicationState states[FLEXIBLE_ARRAY_MEMBER];
} ReplicationStateCtl;
@@ -474,11 +473,6 @@ ReplicationOriginShmemInit(void)
int i;
replication_states_ctl->tranche_id = LWTRANCHE_REPLICATION_ORIGIN;
- replication_states_ctl->tranche.name = "replication_origin";
- replication_states_ctl->tranche.array_base =
- &replication_states[0].lock;
- replication_states_ctl->tranche.array_stride =
- sizeof(ReplicationState);
MemSet(replication_states, 0, ReplicationOriginShmemSize());
@@ -488,7 +482,7 @@ ReplicationOriginShmemInit(void)
}
LWLockRegisterTranche(replication_states_ctl->tranche_id,
- &replication_states_ctl->tranche);
+ "replication_origin");
}
/* ---------------------------------------------------------------------------
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index d8ed005e7e..cf814d11a2 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -98,8 +98,6 @@ ReplicationSlot *MyReplicationSlot = NULL;
int max_replication_slots = 0; /* the maximum number of replication
* slots */
-static LWLockTranche ReplSlotIOLWLockTranche;
-
static void ReplicationSlotDropAcquired(void);
static void ReplicationSlotDropPtr(ReplicationSlot *slot);
@@ -141,12 +139,8 @@ ReplicationSlotsShmemInit(void)
ShmemInitStruct("ReplicationSlot Ctl", ReplicationSlotsShmemSize(),
&found);
- ReplSlotIOLWLockTranche.name = "replication_slot_io";
- ReplSlotIOLWLockTranche.array_base =
- ((char *) ReplicationSlotCtl) + offsetof(ReplicationSlotCtlData, replication_slots) +offsetof(ReplicationSlot, io_in_progress_lock);
- ReplSlotIOLWLockTranche.array_stride = sizeof(ReplicationSlot);
LWLockRegisterTranche(LWTRANCHE_REPLICATION_SLOT_IO_IN_PROGRESS,
- &ReplSlotIOLWLockTranche);
+ "replication_slot_io");
if (!found)
{
diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c
index a4163cf717..c507389a05 100644
--- a/src/backend/storage/buffer/buf_init.c
+++ b/src/backend/storage/buffer/buf_init.c
@@ -21,8 +21,6 @@
BufferDescPadded *BufferDescriptors;
char *BufferBlocks;
LWLockMinimallyPadded *BufferIOLWLockArray = NULL;
-LWLockTranche BufferIOLWLockTranche;
-LWLockTranche BufferContentLWLockTranche;
WritebackContext BackendWritebackContext;
CkptSortItem *CkptBufferIds;
@@ -90,18 +88,8 @@ InitBufferPool(void)
NBuffers * (Size) sizeof(LWLockMinimallyPadded),
&foundIOLocks);
- BufferIOLWLockTranche.name = "buffer_io";
- BufferIOLWLockTranche.array_base = BufferIOLWLockArray;
- BufferIOLWLockTranche.array_stride = sizeof(LWLockMinimallyPadded);
- LWLockRegisterTranche(LWTRANCHE_BUFFER_IO_IN_PROGRESS,
- &BufferIOLWLockTranche);
-
- BufferContentLWLockTranche.name = "buffer_content";
- BufferContentLWLockTranche.array_base =
- ((char *) BufferDescriptors) + offsetof(BufferDesc, content_lock);
- BufferContentLWLockTranche.array_stride = sizeof(BufferDescPadded);
- LWLockRegisterTranche(LWTRANCHE_BUFFER_CONTENT,
- &BufferContentLWLockTranche);
+ LWLockRegisterTranche(LWTRANCHE_BUFFER_IO_IN_PROGRESS, "buffer_io");
+ LWLockRegisterTranche(LWTRANCHE_BUFFER_CONTENT, "buffer_content");
/*
* The array used to sort to-be-checkpointed buffer ids is located in
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index bf38470f01..0f637556cc 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -106,9 +106,6 @@ static TransactionId *KnownAssignedXids;
static bool *KnownAssignedXidsValid;
static TransactionId latestObservedXid = InvalidTransactionId;
-/* LWLock tranche for backend locks */
-static LWLockTranche ProcLWLockTranche;
-
/*
* If we're in STANDBY_SNAPSHOT_PENDING state, standbySnapshotPendingXmin is
* the highest xid that might still be running that we don't have in
@@ -266,11 +263,7 @@ CreateSharedProcArray(void)
}
/* Register and initialize fields of ProcLWLockTranche */
- ProcLWLockTranche.name = "proc";
- ProcLWLockTranche.array_base = (char *) (ProcGlobal->allProcs) +
- offsetof(PGPROC, backendLock);
- ProcLWLockTranche.array_stride = sizeof(PGPROC);
- LWLockRegisterTranche(LWTRANCHE_PROC, &ProcLWLockTranche);
+ LWLockRegisterTranche(LWTRANCHE_PROC, "proc");
}
/*
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 03c4c7825e..4b381e4e0c 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -108,18 +108,14 @@ extern slock_t *ShmemLock;
#define LW_SHARED_MASK ((uint32) ((1 << 24)-1))
/*
- * This is indexed by tranche ID and stores metadata for all tranches known
+ * This is indexed by tranche ID and stores the names of all tranches known
* to the current backend.
*/
-static LWLockTranche **LWLockTrancheArray = NULL;
+static char **LWLockTrancheArray = NULL;
static int LWLockTranchesAllocated = 0;
#define T_NAME(lock) \
- (LWLockTrancheArray[(lock)->tranche]->name)
-#define T_ID(lock) \
- ((int) ((((char *) lock) - \
- ((char *) LWLockTrancheArray[(lock)->tranche]->array_base)) / \
- LWLockTrancheArray[(lock)->tranche]->array_stride))
+ (LWLockTrancheArray[(lock)->tranche])
/*
* This points to the main array of LWLocks in shared memory. Backends inherit
@@ -127,10 +123,6 @@ static int LWLockTranchesAllocated = 0;
* where we have special measures to pass it down).
*/
LWLockPadded *MainLWLockArray = NULL;
-static LWLockTranche MainLWLockTranche;
-static LWLockTranche BufMappingLWLockTranche;
-static LWLockTranche LockManagerLWLockTranche;
-static LWLockTranche PredicateLockManagerLWLockTranche;
/*
* We use this structure to keep track of locked LWLocks for release
@@ -175,7 +167,7 @@ static inline void LWLockReportWaitEnd(void);
typedef struct lwlock_stats_key
{
int tranche;
- int instance;
+ void *instance;
} lwlock_stats_key;
typedef struct lwlock_stats
@@ -202,32 +194,18 @@ PRINT_LWDEBUG(const char *where, LWLock *lock, LWLockMode mode)
if (Trace_lwlocks)
{
uint32 state = pg_atomic_read_u32(&lock->state);
- int id = T_ID(lock);
-
- if (lock->tranche == 0 && id < NUM_INDIVIDUAL_LWLOCKS)
- ereport(LOG,
- (errhidestmt(true),
- errhidecontext(true),
- errmsg_internal("%d: %s(%s): excl %u shared %u haswaiters %u waiters %u rOK %d",
- MyProcPid,
- where, MainLWLockNames[id],
- (state & LW_VAL_EXCLUSIVE) != 0,
- state & LW_SHARED_MASK,
- (state & LW_FLAG_HAS_WAITERS) != 0,
- pg_atomic_read_u32(&lock->nwaiters),
- (state & LW_FLAG_RELEASE_OK) != 0)));
- else
- ereport(LOG,
- (errhidestmt(true),
- errhidecontext(true),
- errmsg_internal("%d: %s(%s %d): excl %u shared %u haswaiters %u waiters %u rOK %d",
- MyProcPid,
- where, T_NAME(lock), id,
- (state & LW_VAL_EXCLUSIVE) != 0,
- state & LW_SHARED_MASK,
- (state & LW_FLAG_HAS_WAITERS) != 0,
- pg_atomic_read_u32(&lock->nwaiters),
- (state & LW_FLAG_RELEASE_OK) != 0)));
+
+ ereport(LOG,
+ (errhidestmt(true),
+ errhidecontext(true),
+ errmsg_internal("%d: %s(%s %p): excl %u shared %u haswaiters %u waiters %u rOK %d",
+ MyProcPid,
+ where, T_NAME(lock), lock,
+ (state & LW_VAL_EXCLUSIVE) != 0,
+ state & LW_SHARED_MASK,
+ (state & LW_FLAG_HAS_WAITERS) != 0,
+ pg_atomic_read_u32(&lock->nwaiters),
+ (state & LW_FLAG_RELEASE_OK) != 0)));
}
}
@@ -237,20 +215,11 @@ LOG_LWDEBUG(const char *where, LWLock *lock, const char *msg)
/* hide statement & context here, otherwise the log is just too verbose */
if (Trace_lwlocks)
{
- int id = T_ID(lock);
-
- if (lock->tranche == 0 && id < NUM_INDIVIDUAL_LWLOCKS)
- ereport(LOG,
- (errhidestmt(true),
- errhidecontext(true),
- errmsg_internal("%s(%s): %s", where,
- MainLWLockNames[id], msg)));
- else
- ereport(LOG,
- (errhidestmt(true),
- errhidecontext(true),
- errmsg_internal("%s(%s %d): %s", where,
- T_NAME(lock), id, msg)));
+ ereport(LOG,
+ (errhidestmt(true),
+ errhidecontext(true),
+ errmsg_internal("%s(%s %p): %s", where,
+ T_NAME(lock), lock, msg)));
}
}
@@ -315,8 +284,8 @@ print_lwlock_stats(int code, Datum arg)
while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)
{
fprintf(stderr,
- "PID %d lwlock %s %d: shacq %u exacq %u blk %u spindelay %u dequeue self %u\n",
- MyProcPid, LWLockTrancheArray[lwstats->key.tranche]->name,
+ "PID %d lwlock %s %p: shacq %u exacq %u blk %u spindelay %u dequeue self %u\n",
+ MyProcPid, LWLockTrancheArray[lwstats->key.tranche],
lwstats->key.instance, lwstats->sh_acquire_count,
lwstats->ex_acquire_count, lwstats->block_count,
lwstats->spin_delay_count, lwstats->dequeue_self_count);
@@ -342,7 +311,7 @@ get_lwlock_stats_entry(LWLock *lock)
/* Fetch or create the entry. */
key.tranche = lock->tranche;
- key.instance = T_ID(lock);
+ key.instance = lock;
lwstats = hash_search(lwlock_stats_htab, &key, HASH_ENTER, &found);
if (!found)
{
@@ -464,7 +433,7 @@ InitializeLWLocks(void)
/* Initialize all individual LWLocks in main array */
for (id = 0, lock = MainLWLockArray; id < NUM_INDIVIDUAL_LWLOCKS; id++, lock++)
- LWLockInitialize(&lock->lock, LWTRANCHE_MAIN);
+ LWLockInitialize(&lock->lock, id);
/* Initialize buffer mapping LWLocks in main array */
lock = MainLWLockArray + NUM_INDIVIDUAL_LWLOCKS;
@@ -506,10 +475,8 @@ InitializeLWLocks(void)
name = trancheNames;
trancheNames += strlen(request->tranche_name) + 1;
strcpy(name, request->tranche_name);
- tranche->lwLockTranche.name = name;
tranche->trancheId = LWLockNewTrancheId();
- tranche->lwLockTranche.array_base = lock;
- tranche->lwLockTranche.array_stride = sizeof(LWLockPadded);
+ tranche->trancheName = name;
for (j = 0; j < request->num_lwlocks; j++, lock++)
LWLockInitialize(&lock->lock, tranche->trancheId);
@@ -527,39 +494,25 @@ RegisterLWLockTranches(void)
if (LWLockTrancheArray == NULL)
{
- LWLockTranchesAllocated = 32;
- LWLockTrancheArray = (LWLockTranche **)
+ LWLockTranchesAllocated = 64;
+ LWLockTrancheArray = (char **)
MemoryContextAllocZero(TopMemoryContext,
- LWLockTranchesAllocated * sizeof(LWLockTranche *));
+ LWLockTranchesAllocated * sizeof(char *));
Assert(LWLockTranchesAllocated >= LWTRANCHE_FIRST_USER_DEFINED);
}
- MainLWLockTranche.name = "main";
- MainLWLockTranche.array_base = MainLWLockArray;
- MainLWLockTranche.array_stride = sizeof(LWLockPadded);
- LWLockRegisterTranche(LWTRANCHE_MAIN, &MainLWLockTranche);
+ for (i = 0; i < NUM_INDIVIDUAL_LWLOCKS; ++i)
+ LWLockRegisterTranche(i, MainLWLockNames[i]);
- BufMappingLWLockTranche.name = "buffer_mapping";
- BufMappingLWLockTranche.array_base = MainLWLockArray + NUM_INDIVIDUAL_LWLOCKS;
- BufMappingLWLockTranche.array_stride = sizeof(LWLockPadded);
- LWLockRegisterTranche(LWTRANCHE_BUFFER_MAPPING, &BufMappingLWLockTranche);
-
- LockManagerLWLockTranche.name = "lock_manager";
- LockManagerLWLockTranche.array_base = MainLWLockArray + NUM_INDIVIDUAL_LWLOCKS +
- NUM_BUFFER_PARTITIONS;
- LockManagerLWLockTranche.array_stride = sizeof(LWLockPadded);
- LWLockRegisterTranche(LWTRANCHE_LOCK_MANAGER, &LockManagerLWLockTranche);
-
- PredicateLockManagerLWLockTranche.name = "predicate_lock_manager";
- PredicateLockManagerLWLockTranche.array_base = MainLWLockArray + NUM_INDIVIDUAL_LWLOCKS +
- NUM_BUFFER_PARTITIONS + NUM_LOCK_PARTITIONS;
- PredicateLockManagerLWLockTranche.array_stride = sizeof(LWLockPadded);
- LWLockRegisterTranche(LWTRANCHE_PREDICATE_LOCK_MANAGER, &PredicateLockManagerLWLockTranche);
+ LWLockRegisterTranche(LWTRANCHE_BUFFER_MAPPING, "buffer_mapping");
+ LWLockRegisterTranche(LWTRANCHE_LOCK_MANAGER, "lock_manager");
+ LWLockRegisterTranche(LWTRANCHE_PREDICATE_LOCK_MANAGER,
+ "predicate_lock_manager");
/* Register named tranches. */
for (i = 0; i < NamedLWLockTrancheRequests; i++)
LWLockRegisterTranche(NamedLWLockTrancheArray[i].trancheId,
- &NamedLWLockTrancheArray[i].lwLockTranche);
+ NamedLWLockTrancheArray[i].trancheName);
}
/*
@@ -633,7 +586,7 @@ LWLockNewTrancheId(void)
* (TopMemoryContext, static variable, or similar).
*/
void
-LWLockRegisterTranche(int tranche_id, LWLockTranche *tranche)
+LWLockRegisterTranche(int tranche_id, char *tranche_name)
{
Assert(LWLockTrancheArray != NULL);
@@ -645,15 +598,14 @@ LWLockRegisterTranche(int tranche_id, LWLockTranche *tranche)
while (i <= tranche_id)
i *= 2;
- LWLockTrancheArray = (LWLockTranche **)
- repalloc(LWLockTrancheArray,
- i * sizeof(LWLockTranche *));
+ LWLockTrancheArray = (char **)
+ repalloc(LWLockTrancheArray, i * sizeof(char *));
LWLockTranchesAllocated = i;
while (j < LWLockTranchesAllocated)
LWLockTrancheArray[j++] = NULL;
}
- LWLockTrancheArray[tranche_id] = tranche;
+ LWLockTrancheArray[tranche_id] = tranche_name;
}
/*
@@ -729,12 +681,7 @@ LWLockInitialize(LWLock *lock, int tranche_id)
static inline void
LWLockReportWaitStart(LWLock *lock)
{
- int lockId = T_ID(lock);
-
- if (lock->tranche == 0)
- pgstat_report_wait_start(PG_WAIT_LWLOCK_NAMED | (uint16) lockId);
- else
- pgstat_report_wait_start(PG_WAIT_LWLOCK_TRANCHE | lock->tranche);
+ pgstat_report_wait_start(PG_WAIT_LWLOCK | lock->tranche);
}
/*
@@ -752,10 +699,7 @@ LWLockReportWaitEnd(void)
const char *
GetLWLockIdentifier(uint32 classId, uint16 eventId)
{
- if (classId == PG_WAIT_LWLOCK_NAMED)
- return MainLWLockNames[eventId];
-
- Assert(classId == PG_WAIT_LWLOCK_TRANCHE);
+ Assert(classId == PG_WAIT_LWLOCK);
/*
* It is quite possible that user has registered tranche in one of the
@@ -763,10 +707,10 @@ GetLWLockIdentifier(uint32 classId, uint16 eventId)
* all of them, so we can't assume the tranche is registered here.
*/
if (eventId >= LWLockTranchesAllocated ||
- LWLockTrancheArray[eventId]->name == NULL)
+ LWLockTrancheArray[eventId] == NULL)
return "extension";
- return LWLockTrancheArray[eventId]->name;
+ return LWLockTrancheArray[eventId];
}
/*
@@ -1279,7 +1223,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode)
#endif
LWLockReportWaitStart(lock);
- TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), T_ID(lock), mode);
+ TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
for (;;)
{
@@ -1301,7 +1245,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode)
}
#endif
- TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), T_ID(lock), mode);
+ TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
LWLockReportWaitEnd();
LOG_LWDEBUG("LWLockAcquire", lock, "awakened");
@@ -1310,7 +1254,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode)
result = false;
}
- TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), T_ID(lock), mode);
+ TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), mode);
/* Add lock to list of locks held by this backend */
held_lwlocks[num_held_lwlocks].lock = lock;
@@ -1361,14 +1305,14 @@ LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
RESUME_INTERRUPTS();
LOG_LWDEBUG("LWLockConditionalAcquire", lock, "failed");
- TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(T_NAME(lock), T_ID(lock), mode);
+ TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(T_NAME(lock), mode);
}
else
{
/* Add lock to list of locks held by this backend */
held_lwlocks[num_held_lwlocks].lock = lock;
held_lwlocks[num_held_lwlocks++].mode = mode;
- TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(lock), T_ID(lock), mode);
+ TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(lock), mode);
}
return !mustwait;
}
@@ -1440,7 +1384,7 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
#endif
LWLockReportWaitStart(lock);
- TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), T_ID(lock), mode);
+ TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
for (;;)
{
@@ -1458,7 +1402,7 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
Assert(nwaiters < MAX_BACKENDS);
}
#endif
- TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), T_ID(lock), mode);
+ TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
LWLockReportWaitEnd();
LOG_LWDEBUG("LWLockAcquireOrWait", lock, "awakened");
@@ -1488,8 +1432,7 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
/* Failed to get lock, so release interrupt holdoff */
RESUME_INTERRUPTS();
LOG_LWDEBUG("LWLockAcquireOrWait", lock, "failed");
- TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL(T_NAME(lock), T_ID(lock),
- mode);
+ TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT_FAIL(T_NAME(lock), mode);
}
else
{
@@ -1497,7 +1440,7 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
/* Add lock to list of locks held by this backend */
held_lwlocks[num_held_lwlocks].lock = lock;
held_lwlocks[num_held_lwlocks++].mode = mode;
- TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), T_ID(lock), mode);
+ TRACE_POSTGRESQL_LWLOCK_ACQUIRE_OR_WAIT(T_NAME(lock), mode);
}
return !mustwait;
@@ -1657,8 +1600,7 @@ LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval)
#endif
LWLockReportWaitStart(lock);
- TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), T_ID(lock),
- LW_EXCLUSIVE);
+ TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), LW_EXCLUSIVE);
for (;;)
{
@@ -1677,8 +1619,7 @@ LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval)
}
#endif
- TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), T_ID(lock),
- LW_EXCLUSIVE);
+ TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), LW_EXCLUSIVE);
LWLockReportWaitEnd();
LOG_LWDEBUG("LWLockWaitForVar", lock, "awakened");
@@ -1686,7 +1627,7 @@ LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval)
/* Now loop back and check the status of the lock again. */
}
- TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), T_ID(lock), LW_EXCLUSIVE);
+ TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), LW_EXCLUSIVE);
/*
* Fix the process wait semaphore's count for any absorbed wakeups.
@@ -1784,7 +1725,7 @@ LWLockRelease(LWLock *lock)
break;
if (i < 0)
- elog(ERROR, "lock %s %d is not held", T_NAME(lock), T_ID(lock));
+ elog(ERROR, "lock %s is not held", T_NAME(lock));
mode = held_lwlocks[i].mode;
@@ -1829,7 +1770,7 @@ LWLockRelease(LWLock *lock)
LWLockWakeup(lock);
}
- TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(lock), T_ID(lock));
+ TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(lock));
/*
* Now okay to allow cancel/die interrupts.
diff --git a/src/backend/utils/mmgr/dsa.c b/src/backend/utils/mmgr/dsa.c
index abe3f1a63d..988a2970b2 100644
--- a/src/backend/utils/mmgr/dsa.c
+++ b/src/backend/utils/mmgr/dsa.c
@@ -362,9 +362,6 @@ struct dsa_area
/* Pointer to the control object in shared memory. */
dsa_area_control *control;
- /* The lock tranche for this process. */
- LWLockTranche lwlock_tranche;
-
/* Has the mapping been pinned? */
bool mapping_pinned;
@@ -1207,10 +1204,8 @@ create_internal(void *place, size_t size,
area->mapping_pinned = false;
memset(area->segment_maps, 0, sizeof(dsa_segment_map) * DSA_MAX_SEGMENTS);
area->high_segment_index = 0;
- area->lwlock_tranche.array_base = &area->control->pools[0];
- area->lwlock_tranche.array_stride = sizeof(dsa_area_pool);
- area->lwlock_tranche.name = control->lwlock_tranche_name;
- LWLockRegisterTranche(control->lwlock_tranche_id, &area->lwlock_tranche);
+ LWLockRegisterTranche(control->lwlock_tranche_id,
+ control->lwlock_tranche_name);
LWLockInitialize(&control->lock, control->lwlock_tranche_id);
for (i = 0; i < DSA_NUM_SIZE_CLASSES; ++i)
LWLockInitialize(DSA_SCLASS_LOCK(area, i),
@@ -1267,10 +1262,8 @@ attach_internal(void *place, dsm_segment *segment, dsa_handle handle)
memset(&area->segment_maps[0], 0,
sizeof(dsa_segment_map) * DSA_MAX_SEGMENTS);
area->high_segment_index = 0;
- area->lwlock_tranche.array_base = &area->control->pools[0];
- area->lwlock_tranche.array_stride = sizeof(dsa_area_pool);
- area->lwlock_tranche.name = control->lwlock_tranche_name;
- LWLockRegisterTranche(control->lwlock_tranche_id, &area->lwlock_tranche);
+ LWLockRegisterTranche(control->lwlock_tranche_id,
+ control->lwlock_tranche_name);
/* Set up the segment map for this process's mapping. */
segment_map = &area->segment_maps[0];
diff --git a/src/backend/utils/probes.d b/src/backend/utils/probes.d
index 2f92dfa9ad..adcebe2e05 100644
--- a/src/backend/utils/probes.d
+++ b/src/backend/utils/probes.d
@@ -28,14 +28,14 @@ provider postgresql {
probe transaction__commit(LocalTransactionId);
probe transaction__abort(LocalTransactionId);
- probe lwlock__acquire(const char *, int, LWLockMode);
- probe lwlock__release(const char *, int);
- probe lwlock__wait__start(const char *, int, LWLockMode);
- probe lwlock__wait__done(const char *, int, LWLockMode);
- probe lwlock__condacquire(const char *, int, LWLockMode);
- probe lwlock__condacquire__fail(const char *, int, LWLockMode);
- probe lwlock__acquire__or__wait(const char *, int, LWLockMode);
- probe lwlock__acquire__or__wait__fail(const char *, int, LWLockMode);
+ probe lwlock__acquire(const char *, LWLockMode);
+ probe lwlock__release(const char *);
+ probe lwlock__wait__start(const char *, LWLockMode);
+ probe lwlock__wait__done(const char *, LWLockMode);
+ probe lwlock__condacquire(const char *, LWLockMode);
+ probe lwlock__condacquire__fail(const char *, LWLockMode);
+ probe lwlock__acquire__or__wait(const char *, LWLockMode);
+ probe lwlock__acquire__or__wait__fail(const char *, LWLockMode);
probe lock__wait__start(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, LOCKMODE);
probe lock__wait__done(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, LOCKMODE);
diff --git a/src/include/access/slru.h b/src/include/access/slru.h
index 5fcebc52fb..ca8ab7ba52 100644
--- a/src/include/access/slru.h
+++ b/src/include/access/slru.h
@@ -104,7 +104,6 @@ typedef struct SlruSharedData
/* LWLocks */
int lwlock_tranche_id;
- LWLockTranche lwlock_tranche;
char lwlock_tranche_name[SLRU_MAX_NAME_LENGTH];
LWLockPadded *buffer_locks;
} SlruSharedData;
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 152ff06208..282f8aeccd 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -715,8 +715,7 @@ typedef enum BackendState
* Wait Classes
* ----------
*/
-#define PG_WAIT_LWLOCK_NAMED 0x01000000U
-#define PG_WAIT_LWLOCK_TRANCHE 0x02000000U
+#define PG_WAIT_LWLOCK 0x01000000U
#define PG_WAIT_LOCK 0x03000000U
#define PG_WAIT_BUFFER_PIN 0x04000000U
#define PG_WAIT_ACTIVITY 0x05000000U
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index 9a2d86975c..db1c687e21 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -24,32 +24,6 @@
struct PGPROC;
-/*
- * Prior to PostgreSQL 9.4, every lightweight lock in the system was stored
- * in a single array. For convenience and for compatibility with past
- * releases, we still have a main array, but it's now also permissible to
- * store LWLocks elsewhere in the main shared memory segment or in a dynamic
- * shared memory segment. Each array of lwlocks forms a separate "tranche".
- *
- * It's occasionally necessary to identify a particular LWLock "by name"; e.g.
- * because we wish to report the lock to dtrace. We could store a name or
- * other identifying information in the lock itself, but since it's common
- * to have many nearly-identical locks (e.g. one per buffer) this would end
- * up wasting significant amounts of memory. Instead, each lwlock stores a
- * tranche ID which tells us which array it's part of. Based on that, we can
- * figure out where the lwlock lies within the array using the data structure
- * shown below; the lock is then identified based on the tranche name and
- * computed array index. We need the array stride because the array might not
- * be an array of lwlocks, but rather some larger data structure that includes
- * one or more lwlocks per element.
- */
-typedef struct LWLockTranche
-{
- const char *name;
- void *array_base;
- Size array_stride;
-} LWLockTranche;
-
/*
* Code outside of lwlock.c should not manipulate the contents of this
* structure directly, but we have to declare it here to allow LWLocks to be
@@ -118,8 +92,8 @@ extern char *MainLWLockNames[];
/* struct for storing named tranche information */
typedef struct NamedLWLockTranche
{
- LWLockTranche lwLockTranche;
int trancheId;
+ char *trancheName;
} NamedLWLockTranche;
extern PGDLLIMPORT NamedLWLockTranche *NamedLWLockTrancheArray;
@@ -199,9 +173,9 @@ extern LWLockPadded *GetNamedLWLockTranche(const char *tranche_name);
* There is another, more flexible method of obtaining lwlocks. First, call
* LWLockNewTrancheId just once to obtain a tranche ID; this allocates from
* a shared counter. Next, each individual process using the tranche should
- * call LWLockRegisterTranche() to associate that tranche ID with appropriate
- * metadata. Finally, LWLockInitialize should be called just once per lwlock,
- * passing the tranche ID as an argument.
+ * call LWLockRegisterTranche() to associate that tranche ID with a name.
+ * Finally, LWLockInitialize should be called just once per lwlock, passing
+ * the tranche ID as an argument.
*
* It may seem strange that each process using the tranche must register it
* separately, but dynamic shared memory segments aren't guaranteed to be
@@ -209,17 +183,18 @@ extern LWLockPadded *GetNamedLWLockTranche(const char *tranche_name);
* registration in the main shared memory segment wouldn't work for that case.
*/
extern int LWLockNewTrancheId(void);
-extern void LWLockRegisterTranche(int tranche_id, LWLockTranche *tranche);
+extern void LWLockRegisterTranche(int tranche_id, char *tranche_name);
extern void LWLockInitialize(LWLock *lock, int tranche_id);
/*
- * We reserve a few predefined tranche IDs. A call to LWLockNewTrancheId
- * will never return a value less than LWTRANCHE_FIRST_USER_DEFINED.
+ * Every tranche ID less than NUM_INDIVIDUAL_LWLOCKS is reserved; also,
+ * we reserve additional tranche IDs for builtin tranches not included in
+ * the set of individual LWLocks. A call to LWLockNewTrancheId will never
+ * return a value less than LWTRANCHE_FIRST_USER_DEFINED.
*/
typedef enum BuiltinTrancheIds
{
- LWTRANCHE_MAIN,
- LWTRANCHE_CLOG_BUFFERS,
+ LWTRANCHE_CLOG_BUFFERS = NUM_INDIVIDUAL_LWLOCKS,
LWTRANCHE_COMMITTS_BUFFERS,
LWTRANCHE_SUBTRANS_BUFFERS,
LWTRANCHE_MXACTOFFSET_BUFFERS,
--
cgit v1.2.3
From b645a05fc6112a4857ceac574d4aa24174a70417 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Sat, 17 Dec 2016 12:00:00 -0500
Subject: doc: Remove some trailing whitespace
Per discussion, we will not at this time remove trailing whitespace in
psql output displays where it is part of the actual psql output.
From: Vladimir Rusinov
---
doc/src/sgml/config.sgml | 2 +-
doc/src/sgml/parallel.sgml | 22 +++++++++++-----------
doc/src/sgml/ref/alter_table.sgml | 2 +-
doc/src/sgml/ref/insert.sgml | 2 +-
doc/src/sgml/ref/prepare.sgml | 2 +-
doc/src/sgml/ref/reindexdb.sgml | 2 +-
6 files changed, 16 insertions(+), 16 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 0fc4e57d90..3b614b6ecd 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1181,7 +1181,7 @@ include_dir 'conf.d'
it in plaintext. on> and off> are also accepted, as
aliases for md5> and plain>, respectively.
-
+
diff --git a/doc/src/sgml/parallel.sgml b/doc/src/sgml/parallel.sgml
index cf4c1c9c2a..bca4886b21 100644
--- a/doc/src/sgml/parallel.sgml
+++ b/doc/src/sgml/parallel.sgml
@@ -134,12 +134,12 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
-
+
The query writes any data or locks any database rows. If a query
contains a data-modifying operation either at the top level or within
a CTE, no parallel plans for that query will be generated. This is a
limitation of the current implementation which could be lifted in a
- future release.
+ future release.
@@ -153,7 +153,7 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
FOR x IN query LOOP .. END LOOP will never use a
parallel plan, because the parallel query system is unable to verify
that the code in the loop is safe to execute while parallel query is
- active.
+ active.
@@ -174,7 +174,7 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
query itself, that query will never use a parallel plan. This is a
limitation of the current implementation, but it may not be desirable
to remove this limitation, since it could result in a single query
- using a very large number of processes.
+ using a very large number of processes.
@@ -197,7 +197,7 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
-
+
No background workers can be obtained because of the limitation that
the total number of background workers cannot exceed
.
@@ -205,7 +205,7 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
-
+
No background workers can be obtained because of the limitation that
the total number of background workers launched for purposes of
parallel query cannot exceed .
@@ -213,7 +213,7 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
-
+
The client sends an Execute message with a non-zero fetch count.
See the discussion of the
extended query protocol.
@@ -228,7 +228,7 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
-
+
A prepared statement is executed using a CREATE TABLE .. AS
EXECUTE .. statement. This construct converts what otherwise
would have been a read-only operation into a read-write operation,
@@ -237,7 +237,7 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
-
+
The transaction isolation level is serializable. This situation
does not normally arise, because parallel query plans are not
generated when the transaction isolation level is serializable.
@@ -467,7 +467,7 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
transaction. If you write a function which does this, and this behavior
difference is important to you, mark such functions as
PARALLEL RESTRICTED
- to ensure that they execute only in the leader.
+ to ensure that they execute only in the leader.
@@ -475,7 +475,7 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
parallel-restricted functions or aggregates involved in the query in
order to obtain a superior plan. So, for example, if a WHERE>
clause applied to a particular table is parallel restricted, the query
- planner will not consider placing the scan of that table below a
+ planner will not consider placing the scan of that table below a
Gather> node. In some cases, it would be
possible (and perhaps even efficient) to include the scan of that table in
the parallel portion of the query and defer the evaluation of the
diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml
index 333b01db36..8ea6624147 100644
--- a/doc/src/sgml/ref/alter_table.sgml
+++ b/doc/src/sgml/ref/alter_table.sgml
@@ -1121,7 +1121,7 @@ ALTER TABLE [ IF EXISTS ] name
Also, because selecting from the parent also selects from its descendents,
a constraint on the parent cannot be marked valid unless it is also marked
valid for those descendents. In all of these cases, ALTER TABLE
- ONLY will be rejected.
+ ONLY will be rejected.
diff --git a/doc/src/sgml/ref/insert.sgml b/doc/src/sgml/ref/insert.sgml
index 00c984d8d5..9339826818 100644
--- a/doc/src/sgml/ref/insert.sgml
+++ b/doc/src/sgml/ref/insert.sgml
@@ -526,7 +526,7 @@ INSERT oidcount
-
+
Notes
diff --git a/doc/src/sgml/ref/prepare.sgml b/doc/src/sgml/ref/prepare.sgml
index 8efd51aaec..fea2196efe 100644
--- a/doc/src/sgml/ref/prepare.sgml
+++ b/doc/src/sgml/ref/prepare.sgml
@@ -73,7 +73,7 @@ PREPARE name [ ( reindexdb
connection-optionoption
-
+
--
cgit v1.2.3
From 3901fd70cc7ccacef1b0549a6835bb7d8dcaae43 Mon Sep 17 00:00:00 2001
From: Fujii Masao
Date: Mon, 19 Dec 2016 21:15:30 +0900
Subject: Support quorum-based synchronous replication.
This feature is also known as "quorum commit" especially in discussion
on pgsql-hackers.
This commit adds the following new syntaxes into synchronous_standby_names
GUC. By using FIRST and ANY keywords, users can specify the method to
choose synchronous standbys from the listed servers.
FIRST num_sync (standby_name [, ...])
ANY num_sync (standby_name [, ...])
The keyword FIRST specifies a priority-based synchronous replication
which was available also in 9.6 or before. This method makes transaction
commits wait until their WAL records are replicated to num_sync
synchronous standbys chosen based on their priorities.
The keyword ANY specifies a quorum-based synchronous replication
and makes transaction commits wait until their WAL records are
replicated to *at least* num_sync listed standbys. In this method,
the values of sync_state.pg_stat_replication for the listed standbys
are reported as "quorum". The priority is still assigned to each standby,
but not used in this method.
The existing syntaxes having neither FIRST nor ANY keyword are still
supported. They are the same as new syntax with FIRST keyword, i.e.,
a priorirty-based synchronous replication.
Author: Masahiko Sawada
Reviewed-By: Michael Paquier, Amit Kapila and me
Discussion:
Many thanks to the various individuals who were involved in
discussing and developing this feature.
---
doc/src/sgml/config.sgml | 68 ++++--
doc/src/sgml/high-availability.sgml | 44 +++-
doc/src/sgml/monitoring.sgml | 9 +-
src/backend/replication/Makefile | 2 +-
src/backend/replication/syncrep.c | 289 ++++++++++++++++++++++----
src/backend/replication/syncrep_gram.y | 14 +-
src/backend/replication/syncrep_scanner.l | 3 +
src/backend/replication/walsender.c | 12 +-
src/backend/utils/misc/postgresql.conf.sample | 3 +-
src/include/replication/syncrep.h | 7 +
src/test/recovery/t/007_sync_rep.pl | 33 ++-
11 files changed, 397 insertions(+), 87 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 3b614b6ecd..1b98c416e0 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3054,41 +3054,71 @@ include_dir 'conf.d'
transactions waiting for commit will be allowed to proceed after
these standby servers confirm receipt of their data.
The synchronous standbys will be those whose names appear
- earlier in this list, and
+ in this list, and
that are both currently connected and streaming data in real-time
(as shown by a state of streaming in the
pg_stat_replication> view).
- Other standby servers appearing later in this list represent potential
- synchronous standbys. If any of the current synchronous
- standbys disconnects for whatever reason,
- it will be replaced immediately with the next-highest-priority standby.
- Specifying more than one standby name can allow very high availability.
+ Specifying more than one standby names can allow very high availability.
This parameter specifies a list of standby servers using
either of the following syntaxes:
-num_sync ( standby_name [, ...] )
+[FIRST] num_sync ( standby_name [, ...] )
+ANY num_sync ( standby_name [, ...] )
standby_name [, ...]
where num_sync is
the number of synchronous standbys that transactions need to
wait for replies from,
and standby_name
- is the name of a standby server. For example, a setting of
- 3 (s1, s2, s3, s4)> makes transaction commits wait
- until their WAL records are received by three higher-priority standbys
- chosen from standby servers s1>, s2>,
- s3> and s4>.
-
-
- The second syntax was used before PostgreSQL>
+ is the name of a standby server.
+ FIRST> and ANY> specify the method to choose
+ synchronous standbys from the listed servers.
+
+
+ The keyword FIRST>, coupled with
+ num_sync, specifies a
+ priority-based synchronous replication and makes transaction commits
+ wait until their WAL records are replicated to
+ num_sync synchronous
+ standbys chosen based on their priorities. For example, a setting of
+ FIRST 3 (s1, s2, s3, s4)> will cause each commit to wait for
+ replies from three higher-priority standbys chosen from standby servers
+ s1>, s2>, s3> and s4>.
+ The standbys whose names appear earlier in the list are given higher
+ priority and will be considered as synchronous. Other standby servers
+ appearing later in this list represent potential synchronous standbys.
+ If any of the current synchronous standbys disconnects for whatever
+ reason, it will be replaced immediately with the next-highest-priority
+ standby. The keyword FIRST> is optional.
+
+
+ The keyword ANY>, coupled with
+ num_sync, specifies a
+ quorum-based synchronous replication and makes transaction commits
+ wait until their WAL records are replicated to at least>
+ num_sync listed standbys.
+ For example, a setting of ANY 3 (s1, s2, s3, s4)> will cause
+ each commit to proceed as soon as at least any three standbys of
+ s1>, s2>, s3> and s4>
+ reply.
+
+
+ FIRST> and ANY> are case-insensitive. If these
+ keywords are used as the name of a standby server,
+ its standby_name must
+ be double-quoted.
+
+
+ The third syntax was used before PostgreSQL>
version 9.6 and is still supported. It's the same as the first syntax
- with num_sync equal to 1.
- For example, 1 (s1, s2)> and
- s1, s2> have the same meaning: either s1>
- or s2> is chosen as a synchronous standby.
+ with FIRST> and
+ num_sync equal to 1.
+ For example, FIRST 1 (s1, s2)> and s1, s2> have
+ the same meaning: either s1> or s2> is chosen
+ as a synchronous standby.
The name of a standby server for this purpose is the
diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml
index 6b89507c8c..a1a9532088 100644
--- a/doc/src/sgml/high-availability.sgml
+++ b/doc/src/sgml/high-availability.sgml
@@ -1138,19 +1138,25 @@ primary_slot_name = 'node_a_slot'
as synchronous confirm receipt of their data. The number of synchronous
standbys that transactions must wait for replies from is specified in
synchronous_standby_names>. This parameter also specifies
- a list of standby names, which determines the priority of each standby
- for being chosen as a synchronous standby. The standbys whose names
- appear earlier in the list are given higher priority and will be considered
- as synchronous. Other standby servers appearing later in this list
- represent potential synchronous standbys. If any of the current
- synchronous standbys disconnects for whatever reason, it will be replaced
- immediately with the next-highest-priority standby.
+ a list of standby names and the method (FIRST> and
+ ANY>) to choose synchronous standbys from the listed ones.
- An example of synchronous_standby_names> for multiple
- synchronous standbys is:
+ The method FIRST> specifies a priority-based synchronous
+ replication and makes transaction commits wait until their WAL records are
+ replicated to the requested number of synchronous standbys chosen based on
+ their priorities. The standbys whose names appear earlier in the list are
+ given higher priority and will be considered as synchronous. Other standby
+ servers appearing later in this list represent potential synchronous
+ standbys. If any of the current synchronous standbys disconnects for
+ whatever reason, it will be replaced immediately with the
+ next-highest-priority standby.
+
+
+ An example of synchronous_standby_names> for
+ a priority-based multiple synchronous standbys is:
-synchronous_standby_names = '2 (s1, s2, s3)'
+synchronous_standby_names = 'FIRST 2 (s1, s2, s3)'
In this example, if four standby servers s1>, s2>,
s3> and s4> are running, the two standbys
@@ -1161,6 +1167,24 @@ synchronous_standby_names = '2 (s1, s2, s3)'
s2> fails. s4> is an asynchronous standby since
its name is not in the list.
+
+ The method ANY> specifies a quorum-based synchronous
+ replication and makes transaction commits wait until their WAL records
+ are replicated to at least> the requested number of
+ synchronous standbys in the list.
+
+
+ An example of synchronous_standby_names> for
+ a quorum-based multiple synchronous standbys is:
+
+ synchronous_standby_names = 'ANY 2 (s1, s2, s3)'
+
+ In this example, if four standby servers s1>, s2>,
+ s3> and s4> are running, transaction commits will
+ wait for replies from at least any two standbys of s1>,
+ s2> and s3>. s4> is an asynchronous
+ standby since its name is not in the list.
+
The synchronous states of standby servers can be viewed using
the pg_stat_replication view.
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 5b58d2e84d..02bc8feca7 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -1404,7 +1404,8 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
sync_priority>integer>Priority of this standby server for being chosen as the
- synchronous standby
+ synchronous standby in a priority-based synchronous replication.
+ This has no effect in a quorum-based synchronous replication.
sync_state>
@@ -1429,6 +1430,12 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
sync>: This standby server is synchronous.
+
+
+ quorum>: This standby server is considered as a candidate
+ for quorum standbys.
+
+
diff --git a/src/backend/replication/Makefile b/src/backend/replication/Makefile
index c99717e0ae..da8bcf0471 100644
--- a/src/backend/replication/Makefile
+++ b/src/backend/replication/Makefile
@@ -26,7 +26,7 @@ repl_gram.o: repl_scanner.c
# syncrep_scanner is complied as part of syncrep_gram
syncrep_gram.o: syncrep_scanner.c
-syncrep_scanner.c: FLEXFLAGS = -CF -p
+syncrep_scanner.c: FLEXFLAGS = -CF -p -i
syncrep_scanner.c: FLEX_NO_BACKUP=yes
# repl_gram.c, repl_scanner.c, syncrep_gram.c and syncrep_scanner.c
diff --git a/src/backend/replication/syncrep.c b/src/backend/replication/syncrep.c
index ce2009882d..9143c47f92 100644
--- a/src/backend/replication/syncrep.c
+++ b/src/backend/replication/syncrep.c
@@ -30,23 +30,34 @@
* searching the through all waiters each time we receive a reply.
*
* In 9.5 or before only a single standby could be considered as
- * synchronous. In 9.6 we support multiple synchronous standbys.
- * The number of synchronous standbys that transactions must wait for
- * replies from is specified in synchronous_standby_names.
- * This parameter also specifies a list of standby names,
- * which determines the priority of each standby for being chosen as
- * a synchronous standby. The standbys whose names appear earlier
- * in the list are given higher priority and will be considered as
- * synchronous. Other standby servers appearing later in this list
- * represent potential synchronous standbys. If any of the current
- * synchronous standbys disconnects for whatever reason, it will be
- * replaced immediately with the next-highest-priority standby.
+ * synchronous. In 9.6 we support a priority-based multiple synchronous
+ * standbys. In 10.0 a quorum-based multiple synchronous standbys is also
+ * supported. The number of synchronous standbys that transactions
+ * must wait for replies from is specified in synchronous_standby_names.
+ * This parameter also specifies a list of standby names and the method
+ * (FIRST and ANY) to choose synchronous standbys from the listed ones.
+ *
+ * The method FIRST specifies a priority-based synchronous replication
+ * and makes transaction commits wait until their WAL records are
+ * replicated to the requested number of synchronous standbys chosen based
+ * on their priorities. The standbys whose names appear earlier in the list
+ * are given higher priority and will be considered as synchronous.
+ * Other standby servers appearing later in this list represent potential
+ * synchronous standbys. If any of the current synchronous standbys
+ * disconnects for whatever reason, it will be replaced immediately with
+ * the next-highest-priority standby.
+ *
+ * The method ANY specifies a quorum-based synchronous replication
+ * and makes transaction commits wait until their WAL records are
+ * replicated to at least the requested number of synchronous standbys
+ * in the list. All the standbys appearing in the list are considered as
+ * candidates for quorum synchronous standbys.
*
* Before the standbys chosen from synchronous_standby_names can
* become the synchronous standbys they must have caught up with
* the primary; that may take some time. Once caught up,
- * the current higher priority standbys which are considered as
- * synchronous at that moment will release waiters from the queue.
+ * the standbys which are considered as synchronous at that moment
+ * will release waiters from the queue.
*
* Portions Copyright (c) 2010-2016, PostgreSQL Global Development Group
*
@@ -79,18 +90,29 @@ char *SyncRepStandbyNames;
static bool announce_next_takeover = true;
-static SyncRepConfigData *SyncRepConfig = NULL;
+SyncRepConfigData *SyncRepConfig = NULL;
static int SyncRepWaitMode = SYNC_REP_NO_WAIT;
static void SyncRepQueueInsert(int mode);
static void SyncRepCancelWait(void);
static int SyncRepWakeQueue(bool all, int mode);
-static bool SyncRepGetOldestSyncRecPtr(XLogRecPtr *writePtr,
- XLogRecPtr *flushPtr,
- XLogRecPtr *applyPtr,
- bool *am_sync);
+static bool SyncRepGetSyncRecPtr(XLogRecPtr *writePtr,
+ XLogRecPtr *flushPtr,
+ XLogRecPtr *applyPtr,
+ bool *am_sync);
+static void SyncRepGetOldestSyncRecPtr(XLogRecPtr *writePtr,
+ XLogRecPtr *flushPtr,
+ XLogRecPtr *applyPtr,
+ List *sync_standbys);
+static void SyncRepGetNthLatestSyncRecPtr(XLogRecPtr *writePtr,
+ XLogRecPtr *flushPtr,
+ XLogRecPtr *applyPtr,
+ List *sync_standbys, uint8 nth);
static int SyncRepGetStandbyPriority(void);
+static List *SyncRepGetSyncStandbysPriority(bool *am_sync);
+static List *SyncRepGetSyncStandbysQuorum(bool *am_sync);
+static int cmp_lsn(const void *a, const void *b);
#ifdef USE_ASSERT_CHECKING
static bool SyncRepQueueIsOrderedByLSN(int mode);
@@ -386,7 +408,7 @@ SyncRepReleaseWaiters(void)
XLogRecPtr writePtr;
XLogRecPtr flushPtr;
XLogRecPtr applyPtr;
- bool got_oldest;
+ bool got_recptr;
bool am_sync;
int numwrite = 0;
int numflush = 0;
@@ -413,11 +435,10 @@ SyncRepReleaseWaiters(void)
LWLockAcquire(SyncRepLock, LW_EXCLUSIVE);
/*
- * Check whether we are a sync standby or not, and calculate the oldest
+ * Check whether we are a sync standby or not, and calculate the synced
* positions among all sync standbys.
*/
- got_oldest = SyncRepGetOldestSyncRecPtr(&writePtr, &flushPtr,
- &applyPtr, &am_sync);
+ got_recptr = SyncRepGetSyncRecPtr(&writePtr, &flushPtr, &applyPtr, &am_sync);
/*
* If we are managing a sync standby, though we weren't prior to this,
@@ -426,16 +447,22 @@ SyncRepReleaseWaiters(void)
if (announce_next_takeover && am_sync)
{
announce_next_takeover = false;
- ereport(LOG,
- (errmsg("standby \"%s\" is now a synchronous standby with priority %u",
- application_name, MyWalSnd->sync_standby_priority)));
+
+ if (SyncRepConfig->syncrep_method == SYNC_REP_PRIORITY)
+ ereport(LOG,
+ (errmsg("standby \"%s\" is now a synchronous standby with priority %u",
+ application_name, MyWalSnd->sync_standby_priority)));
+ else
+ ereport(LOG,
+ (errmsg("standby \"%s\" is now a candidate for quorum synchronous standby",
+ application_name)));
}
/*
* If the number of sync standbys is less than requested or we aren't
* managing a sync standby then just leave.
*/
- if (!got_oldest || !am_sync)
+ if (!got_recptr || !am_sync)
{
LWLockRelease(SyncRepLock);
announce_next_takeover = !am_sync;
@@ -471,21 +498,20 @@ SyncRepReleaseWaiters(void)
}
/*
- * Calculate the oldest Write, Flush and Apply positions among sync standbys.
+ * Calculate the synced Write, Flush and Apply positions among sync standbys.
*
* Return false if the number of sync standbys is less than
* synchronous_standby_names specifies. Otherwise return true and
- * store the oldest positions into *writePtr, *flushPtr and *applyPtr.
+ * store the positions into *writePtr, *flushPtr and *applyPtr.
*
* On return, *am_sync is set to true if this walsender is connecting to
* sync standby. Otherwise it's set to false.
*/
static bool
-SyncRepGetOldestSyncRecPtr(XLogRecPtr *writePtr, XLogRecPtr *flushPtr,
+SyncRepGetSyncRecPtr(XLogRecPtr *writePtr, XLogRecPtr *flushPtr,
XLogRecPtr *applyPtr, bool *am_sync)
{
List *sync_standbys;
- ListCell *cell;
*writePtr = InvalidXLogRecPtr;
*flushPtr = InvalidXLogRecPtr;
@@ -508,12 +534,49 @@ SyncRepGetOldestSyncRecPtr(XLogRecPtr *writePtr, XLogRecPtr *flushPtr,
}
/*
- * Scan through all sync standbys and calculate the oldest Write, Flush
- * and Apply positions.
+ * In a priority-based sync replication, the synced positions are the
+ * oldest ones among sync standbys. In a quorum-based, they are the Nth
+ * latest ones.
+ *
+ * SyncRepGetNthLatestSyncRecPtr() also can calculate the oldest positions.
+ * But we use SyncRepGetOldestSyncRecPtr() for that calculation because
+ * it's a bit more efficient.
+ *
+ * XXX If the numbers of current and requested sync standbys are the same,
+ * we can use SyncRepGetOldestSyncRecPtr() to calculate the synced
+ * positions even in a quorum-based sync replication.
+ */
+ if (SyncRepConfig->syncrep_method == SYNC_REP_PRIORITY)
+ {
+ SyncRepGetOldestSyncRecPtr(writePtr, flushPtr, applyPtr,
+ sync_standbys);
+ }
+ else
+ {
+ SyncRepGetNthLatestSyncRecPtr(writePtr, flushPtr, applyPtr,
+ sync_standbys, SyncRepConfig->num_sync);
+ }
+
+ list_free(sync_standbys);
+ return true;
+}
+
+/*
+ * Calculate the oldest Write, Flush and Apply positions among sync standbys.
+ */
+static void
+SyncRepGetOldestSyncRecPtr(XLogRecPtr *writePtr, XLogRecPtr *flushPtr,
+ XLogRecPtr *applyPtr, List *sync_standbys)
+{
+ ListCell *cell;
+
+ /*
+ * Scan through all sync standbys and calculate the oldest
+ * Write, Flush and Apply positions.
*/
- foreach(cell, sync_standbys)
+ foreach (cell, sync_standbys)
{
- WalSnd *walsnd = &WalSndCtl->walsnds[lfirst_int(cell)];
+ WalSnd *walsnd = &WalSndCtl->walsnds[lfirst_int(cell)];
XLogRecPtr write;
XLogRecPtr flush;
XLogRecPtr apply;
@@ -531,23 +594,163 @@ SyncRepGetOldestSyncRecPtr(XLogRecPtr *writePtr, XLogRecPtr *flushPtr,
if (XLogRecPtrIsInvalid(*applyPtr) || *applyPtr > apply)
*applyPtr = apply;
}
+}
- list_free(sync_standbys);
- return true;
+/*
+ * Calculate the Nth latest Write, Flush and Apply positions among sync
+ * standbys.
+ */
+static void
+SyncRepGetNthLatestSyncRecPtr(XLogRecPtr *writePtr, XLogRecPtr *flushPtr,
+ XLogRecPtr *applyPtr, List *sync_standbys, uint8 nth)
+{
+ ListCell *cell;
+ XLogRecPtr *write_array;
+ XLogRecPtr *flush_array;
+ XLogRecPtr *apply_array;
+ int len;
+ int i = 0;
+
+ len = list_length(sync_standbys);
+ write_array = (XLogRecPtr *) palloc(sizeof(XLogRecPtr) * len);
+ flush_array = (XLogRecPtr *) palloc(sizeof(XLogRecPtr) * len);
+ apply_array = (XLogRecPtr *) palloc(sizeof(XLogRecPtr) * len);
+
+ foreach (cell, sync_standbys)
+ {
+ WalSnd *walsnd = &WalSndCtl->walsnds[lfirst_int(cell)];
+
+ SpinLockAcquire(&walsnd->mutex);
+ write_array[i] = walsnd->write;
+ flush_array[i] = walsnd->flush;
+ apply_array[i] = walsnd->apply;
+ SpinLockRelease(&walsnd->mutex);
+
+ i++;
+ }
+
+ qsort(write_array, len, sizeof(XLogRecPtr), cmp_lsn);
+ qsort(flush_array, len, sizeof(XLogRecPtr), cmp_lsn);
+ qsort(apply_array, len, sizeof(XLogRecPtr), cmp_lsn);
+
+ /* Get Nth latest Write, Flush, Apply positions */
+ *writePtr = write_array[nth - 1];
+ *flushPtr = flush_array[nth - 1];
+ *applyPtr = apply_array[nth - 1];
+
+ pfree(write_array);
+ pfree(flush_array);
+ pfree(apply_array);
+}
+
+/*
+ * Compare lsn in order to sort array in descending order.
+ */
+static int
+cmp_lsn(const void *a, const void *b)
+{
+ XLogRecPtr lsn1 = *((const XLogRecPtr *) a);
+ XLogRecPtr lsn2 = *((const XLogRecPtr *) b);
+
+ if (lsn1 > lsn2)
+ return -1;
+ else if (lsn1 == lsn2)
+ return 0;
+ else
+ return 1;
}
/*
* Return the list of sync standbys, or NIL if no sync standby is connected.
*
- * If there are multiple standbys with the same priority,
- * the first one found is selected preferentially.
* The caller must hold SyncRepLock.
*
* On return, *am_sync is set to true if this walsender is connecting to
* sync standby. Otherwise it's set to false.
*/
List *
-SyncRepGetSyncStandbys(bool *am_sync)
+SyncRepGetSyncStandbys(bool *am_sync)
+{
+ /* Set default result */
+ if (am_sync != NULL)
+ *am_sync = false;
+
+ /* Quick exit if sync replication is not requested */
+ if (SyncRepConfig == NULL)
+ return NIL;
+
+ return (SyncRepConfig->syncrep_method == SYNC_REP_PRIORITY) ?
+ SyncRepGetSyncStandbysPriority(am_sync) :
+ SyncRepGetSyncStandbysQuorum(am_sync);
+}
+
+/*
+ * Return the list of all the candidates for quorum sync standbys,
+ * or NIL if no such standby is connected.
+ *
+ * The caller must hold SyncRepLock. This function must be called only in
+ * a quorum-based sync replication.
+ *
+ * On return, *am_sync is set to true if this walsender is connecting to
+ * sync standby. Otherwise it's set to false.
+ */
+static List *
+SyncRepGetSyncStandbysQuorum(bool *am_sync)
+{
+ List *result = NIL;
+ int i;
+ volatile WalSnd *walsnd; /* Use volatile pointer to prevent code
+ * rearrangement */
+
+ Assert(SyncRepConfig->syncrep_method == SYNC_REP_QUORUM);
+
+ for (i = 0; i < max_wal_senders; i++)
+ {
+ walsnd = &WalSndCtl->walsnds[i];
+
+ /* Must be active */
+ if (walsnd->pid == 0)
+ continue;
+
+ /* Must be streaming */
+ if (walsnd->state != WALSNDSTATE_STREAMING)
+ continue;
+
+ /* Must be synchronous */
+ if (walsnd->sync_standby_priority == 0)
+ continue;
+
+ /* Must have a valid flush position */
+ if (XLogRecPtrIsInvalid(walsnd->flush))
+ continue;
+
+ /*
+ * Consider this standby as a candidate for quorum sync standbys
+ * and append it to the result.
+ */
+ result = lappend_int(result, i);
+ if (am_sync != NULL && walsnd == MyWalSnd)
+ *am_sync = true;
+ }
+
+ return result;
+}
+
+/*
+ * Return the list of sync standbys chosen based on their priorities,
+ * or NIL if no sync standby is connected.
+ *
+ * If there are multiple standbys with the same priority,
+ * the first one found is selected preferentially.
+ *
+ * The caller must hold SyncRepLock. This function must be called only in
+ * a priority-based sync replication.
+ *
+ * On return, *am_sync is set to true if this walsender is connecting to
+ * sync standby. Otherwise it's set to false.
+ */
+static List *
+SyncRepGetSyncStandbysPriority(bool *am_sync)
{
List *result = NIL;
List *pending = NIL;
@@ -560,13 +763,7 @@ SyncRepGetSyncStandbys(bool *am_sync)
volatile WalSnd *walsnd; /* Use volatile pointer to prevent code
* rearrangement */
- /* Set default result */
- if (am_sync != NULL)
- *am_sync = false;
-
- /* Quick exit if sync replication is not requested */
- if (SyncRepConfig == NULL)
- return NIL;
+ Assert(SyncRepConfig->syncrep_method == SYNC_REP_PRIORITY);
lowest_priority = SyncRepConfig->nmembers;
next_highest_priority = lowest_priority + 1;
diff --git a/src/backend/replication/syncrep_gram.y b/src/backend/replication/syncrep_gram.y
index 35c27760d1..281edc6f28 100644
--- a/src/backend/replication/syncrep_gram.y
+++ b/src/backend/replication/syncrep_gram.y
@@ -21,7 +21,7 @@ SyncRepConfigData *syncrep_parse_result;
char *syncrep_parse_error_msg;
static SyncRepConfigData *create_syncrep_config(const char *num_sync,
- List *members);
+ List *members, uint8 syncrep_method);
/*
* Bison doesn't allocate anything that needs to live across parser calls,
@@ -46,7 +46,7 @@ static SyncRepConfigData *create_syncrep_config(const char *num_sync,
SyncRepConfigData *config;
}
-%token NAME NUM JUNK
+%token NAME NUM JUNK ANY FIRST
%type result standby_config
%type standby_list
@@ -60,8 +60,10 @@ result:
;
standby_config:
- standby_list { $$ = create_syncrep_config("1", $1); }
- | NUM '(' standby_list ')' { $$ = create_syncrep_config($1, $3); }
+ standby_list { $$ = create_syncrep_config("1", $1, SYNC_REP_PRIORITY); }
+ | NUM '(' standby_list ')' { $$ = create_syncrep_config($1, $3, SYNC_REP_PRIORITY); }
+ | ANY NUM '(' standby_list ')' { $$ = create_syncrep_config($2, $4, SYNC_REP_QUORUM); }
+ | FIRST NUM '(' standby_list ')' { $$ = create_syncrep_config($2, $4, SYNC_REP_PRIORITY); }
;
standby_list:
@@ -75,9 +77,8 @@ standby_name:
;
%%
-
static SyncRepConfigData *
-create_syncrep_config(const char *num_sync, List *members)
+create_syncrep_config(const char *num_sync, List *members, uint8 syncrep_method)
{
SyncRepConfigData *config;
int size;
@@ -98,6 +99,7 @@ create_syncrep_config(const char *num_sync, List *members)
config->config_size = size;
config->num_sync = atoi(num_sync);
+ config->syncrep_method = syncrep_method;
config->nmembers = list_length(members);
ptr = config->member_names;
foreach(lc, members)
diff --git a/src/backend/replication/syncrep_scanner.l b/src/backend/replication/syncrep_scanner.l
index d20662ed03..261b30e976 100644
--- a/src/backend/replication/syncrep_scanner.l
+++ b/src/backend/replication/syncrep_scanner.l
@@ -64,6 +64,9 @@ xdinside [^"]+
%%
{space}+ { /* ignore */ }
+ANY { return ANY; }
+FIRST { return FIRST; }
+
{xdstart} {
initStringInfo(&xdbuf);
BEGIN(xd);
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index d80bcc00a1..5cdb8a0ad6 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -2868,12 +2868,20 @@ pg_stat_get_wal_senders(PG_FUNCTION_ARGS)
/*
* More easily understood version of standby state. This is purely
- * informational, not different from priority.
+ * informational.
+ *
+ * In quorum-based sync replication, the role of each standby
+ * listed in synchronous_standby_names can be changing very
+ * frequently. Any standbys considered as "sync" at one moment can
+ * be switched to "potential" ones at the next moment. So, it's
+ * basically useless to report "sync" or "potential" as their sync
+ * states. We report just "quorum" for them.
*/
if (priority == 0)
values[7] = CStringGetTextDatum("async");
else if (list_member_int(sync_standbys, i))
- values[7] = CStringGetTextDatum("sync");
+ values[7] = SyncRepConfig->syncrep_method == SYNC_REP_PRIORITY ?
+ CStringGetTextDatum("sync") : CStringGetTextDatum("quorum");
else
values[7] = CStringGetTextDatum("potential");
}
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 7f9acfda06..2c638b2c09 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -245,7 +245,8 @@
# These settings are ignored on a standby server.
#synchronous_standby_names = '' # standby servers that provide sync rep
- # number of sync standbys and comma-separated list of application_name
+ # method to choose sync standbys, number of sync standbys
+ # and comma-separated list of application_name
# from standby(s); '*' = all
#vacuum_defer_cleanup_age = 0 # number of xacts by which cleanup is delayed
diff --git a/src/include/replication/syncrep.h b/src/include/replication/syncrep.h
index e4e0e27371..9614b3163c 100644
--- a/src/include/replication/syncrep.h
+++ b/src/include/replication/syncrep.h
@@ -32,6 +32,10 @@
#define SYNC_REP_WAITING 1
#define SYNC_REP_WAIT_COMPLETE 2
+/* syncrep_method of SyncRepConfigData */
+#define SYNC_REP_PRIORITY 0
+#define SYNC_REP_QUORUM 1
+
/*
* Struct for the configuration of synchronous replication.
*
@@ -44,11 +48,14 @@ typedef struct SyncRepConfigData
int config_size; /* total size of this struct, in bytes */
int num_sync; /* number of sync standbys that we need to
* wait for */
+ uint8 syncrep_method; /* method to choose sync standbys */
int nmembers; /* number of members in the following list */
/* member_names contains nmembers consecutive nul-terminated C strings */
char member_names[FLEXIBLE_ARRAY_MEMBER];
} SyncRepConfigData;
+extern SyncRepConfigData *SyncRepConfig;
+
/* communication variables for parsing synchronous_standby_names GUC */
extern SyncRepConfigData *syncrep_parse_result;
extern char *syncrep_parse_error_msg;
diff --git a/src/test/recovery/t/007_sync_rep.pl b/src/test/recovery/t/007_sync_rep.pl
index 0c872263ea..e11b4289d5 100644
--- a/src/test/recovery/t/007_sync_rep.pl
+++ b/src/test/recovery/t/007_sync_rep.pl
@@ -3,7 +3,7 @@ use strict;
use warnings;
use PostgresNode;
use TestLib;
-use Test::More tests => 8;
+use Test::More tests => 11;
# Query checking sync_priority and sync_state of each standby
my $check_sql =
@@ -172,3 +172,34 @@ test_sync_state(
standby2|1|sync
standby4|1|potential),
'potential standby found earlier in array is promoted to sync');
+
+# Check that standby1 and standby2 are chosen as sync standbys
+# based on their priorities.
+test_sync_state(
+$node_master, qq(standby1|1|sync
+standby2|2|sync
+standby4|0|async),
+'priority-based sync replication specified by FIRST keyword',
+'FIRST 2(standby1, standby2)');
+
+# Check that all the listed standbys are considered as candidates
+# for sync standbys in a quorum-based sync replication.
+test_sync_state(
+$node_master, qq(standby1|1|quorum
+standby2|2|quorum
+standby4|0|async),
+'2 quorum and 1 async',
+'ANY 2(standby1, standby2)');
+
+# Start Standby3 which will be considered in 'quorum' state.
+$node_standby_3->start;
+
+# Check that the setting of 'ANY 2(*)' chooses all standbys as
+# candidates for quorum sync standbys.
+test_sync_state(
+$node_master, qq(standby1|1|quorum
+standby2|1|quorum
+standby3|1|quorum
+standby4|1|quorum),
+'all standbys are considered as candidates for quorum sync standbys',
+'ANY 2(*)');
--
cgit v1.2.3
From e13029a5ce353574516c64fd1ec9c50201e705fd Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Mon, 19 Dec 2016 16:47:15 -0500
Subject: Provide a DSA area for all parallel queries.
This will allow future parallel query code to dynamically allocate
storage shared by all participants.
Thomas Munro, with assorted changes by me.
---
doc/src/sgml/monitoring.sgml | 6 ++++-
src/backend/executor/execParallel.c | 51 ++++++++++++++++++++++++++++++++++++-
src/include/executor/execParallel.h | 2 ++
src/include/nodes/execnodes.h | 3 +++
src/include/storage/lwlock.h | 1 +
5 files changed, 61 insertions(+), 2 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 02bc8feca7..1545f03656 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -818,7 +818,7 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
- LWLock>
+ LWLock>ShmemIndexLock>Waiting to find or allocate space in shared memory.
@@ -1069,6 +1069,10 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
predicate_lock_manager>Waiting to add or examine predicate lock information.
+
+ parallel_query_dsa>
+ Waiting for parallel query dynamic shared memory allocation lock.
+ Lock>relation>
diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c
index f9c85989d8..8a6f844e35 100644
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -34,6 +34,7 @@
#include "optimizer/planner.h"
#include "storage/spin.h"
#include "tcop/tcopprot.h"
+#include "utils/dsa.h"
#include "utils/memutils.h"
#include "utils/snapmgr.h"
@@ -47,6 +48,7 @@
#define PARALLEL_KEY_BUFFER_USAGE UINT64CONST(0xE000000000000003)
#define PARALLEL_KEY_TUPLE_QUEUE UINT64CONST(0xE000000000000004)
#define PARALLEL_KEY_INSTRUMENTATION UINT64CONST(0xE000000000000005)
+#define PARALLEL_KEY_DSA UINT64CONST(0xE000000000000006)
#define PARALLEL_TUPLE_QUEUE_SIZE 65536
@@ -345,6 +347,7 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, int nworkers)
int param_len;
int instrumentation_len = 0;
int instrument_offset = 0;
+ Size dsa_minsize = dsa_minimum_size();
/* Allocate object for return value. */
pei = palloc0(sizeof(ParallelExecutorInfo));
@@ -413,6 +416,10 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, int nworkers)
shm_toc_estimate_keys(&pcxt->estimator, 1);
}
+ /* Estimate space for DSA area. */
+ shm_toc_estimate_chunk(&pcxt->estimator, dsa_minsize);
+ shm_toc_estimate_keys(&pcxt->estimator, 1);
+
/* Everyone's had a chance to ask for space, so now create the DSM. */
InitializeParallelDSM(pcxt);
@@ -466,6 +473,29 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, int nworkers)
pei->instrumentation = instrumentation;
}
+ /*
+ * Create a DSA area that can be used by the leader and all workers.
+ * (However, if we failed to create a DSM and are using private memory
+ * instead, then skip this.)
+ */
+ if (pcxt->seg != NULL)
+ {
+ char *area_space;
+
+ area_space = shm_toc_allocate(pcxt->toc, dsa_minsize);
+ shm_toc_insert(pcxt->toc, PARALLEL_KEY_DSA, area_space);
+ pei->area = dsa_create_in_place(area_space, dsa_minsize,
+ LWTRANCHE_PARALLEL_QUERY_DSA,
+ "parallel_query_dsa",
+ pcxt->seg);
+ }
+
+ /*
+ * Make the area available to executor nodes running in the leader. See
+ * also ParallelQueryMain which makes it available to workers.
+ */
+ estate->es_query_dsa = pei->area;
+
/*
* Give parallel-aware nodes a chance to initialize their shared data.
* This also initializes the elements of instrumentation->ps_instrument,
@@ -571,6 +601,11 @@ ExecParallelFinish(ParallelExecutorInfo *pei)
void
ExecParallelCleanup(ParallelExecutorInfo *pei)
{
+ if (pei->area != NULL)
+ {
+ dsa_detach(pei->area);
+ pei->area = NULL;
+ }
if (pei->pcxt != NULL)
{
DestroyParallelContext(pei->pcxt);
@@ -728,6 +763,8 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
QueryDesc *queryDesc;
SharedExecutorInstrumentation *instrumentation;
int instrument_options = 0;
+ void *area_space;
+ dsa_area *area;
/* Set up DestReceiver, SharedExecutorInstrumentation, and QueryDesc. */
receiver = ExecParallelGetReceiver(seg, toc);
@@ -739,10 +776,21 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
/* Prepare to track buffer usage during query execution. */
InstrStartParallelQuery();
- /* Start up the executor, have it run the plan, and then shut it down. */
+ /* Attach to the dynamic shared memory area. */
+ area_space = shm_toc_lookup(toc, PARALLEL_KEY_DSA);
+ area = dsa_attach_in_place(area_space, seg);
+
+ /* Start up the executor */
ExecutorStart(queryDesc, 0);
+
+ /* Special executor initialization steps for parallel workers */
+ queryDesc->planstate->state->es_query_dsa = area;
ExecParallelInitializeWorker(queryDesc->planstate, toc);
+
+ /* Run the plan */
ExecutorRun(queryDesc, ForwardScanDirection, 0L);
+
+ /* Shut down the executor */
ExecutorFinish(queryDesc);
/* Report buffer usage during parallel execution. */
@@ -758,6 +806,7 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
ExecutorEnd(queryDesc);
/* Cleanup. */
+ dsa_detach(area);
FreeQueryDesc(queryDesc);
(*receiver->rDestroy) (receiver);
}
diff --git a/src/include/executor/execParallel.h b/src/include/executor/execParallel.h
index f4c6d37a11..4bbee691a7 100644
--- a/src/include/executor/execParallel.h
+++ b/src/include/executor/execParallel.h
@@ -17,6 +17,7 @@
#include "nodes/execnodes.h"
#include "nodes/parsenodes.h"
#include "nodes/plannodes.h"
+#include "utils/dsa.h"
typedef struct SharedExecutorInstrumentation SharedExecutorInstrumentation;
@@ -27,6 +28,7 @@ typedef struct ParallelExecutorInfo
BufferUsage *buffer_usage;
SharedExecutorInstrumentation *instrumentation;
shm_mq_handle **tqueue;
+ dsa_area *area;
bool finished;
} ParallelExecutorInfo;
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 703604ab9d..5c3b8683f5 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -427,6 +427,9 @@ typedef struct EState
HeapTuple *es_epqTuple; /* array of EPQ substitute tuples */
bool *es_epqTupleSet; /* true if EPQ tuple is provided */
bool *es_epqScanDone; /* true if EPQ tuple has been fetched */
+
+ /* The per-query shared memory area to use for parallel execution. */
+ struct dsa_area *es_query_dsa;
} EState;
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index db1c687e21..3ca4db0a72 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -210,6 +210,7 @@ typedef enum BuiltinTrancheIds
LWTRANCHE_BUFFER_MAPPING,
LWTRANCHE_LOCK_MANAGER,
LWTRANCHE_PREDICATE_LOCK_MANAGER,
+ LWTRANCHE_PARALLEL_QUERY_DSA,
LWTRANCHE_FIRST_USER_DEFINED
} BuiltinTrancheIds;
--
cgit v1.2.3
From 1753b1b027035029c2a2a1649065762fafbf63f3 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Tue, 20 Dec 2016 12:00:00 -0500
Subject: Add pg_sequence system catalog
Move sequence metadata (start, increment, etc.) into a proper system
catalog instead of storing it in the sequence heap object. This
separates the metadata from the sequence data. Sequence metadata is now
operated on transactionally by DDL commands, whereas previously
rollbacks of sequence-related DDL commands would be ignored.
Reviewed-by: Andreas Karlsson
---
doc/src/sgml/catalogs.sgml | 88 +++++-
src/backend/catalog/Makefile | 1 +
src/backend/catalog/dependency.c | 6 +
src/backend/catalog/information_schema.sql | 13 +-
src/backend/catalog/system_views.sql | 16 +-
src/backend/commands/sequence.c | 381 +++++++++++++++-----------
src/backend/utils/cache/syscache.c | 12 +
src/bin/pg_dump/pg_dump.c | 22 +-
src/include/catalog/catversion.h | 2 +-
src/include/catalog/indexing.h | 3 +
src/include/catalog/pg_sequence.h | 30 ++
src/include/commands/sequence.h | 29 +-
src/include/utils/syscache.h | 1 +
src/test/regress/expected/rules.out | 18 +-
src/test/regress/expected/sanity_check.out | 1 +
src/test/regress/expected/sequence.out | 33 ++-
src/test/regress/expected/updatable_views.out | 93 +++----
src/test/regress/sql/sequence.sql | 8 +
src/test/regress/sql/updatable_views.sql | 2 +-
19 files changed, 490 insertions(+), 269 deletions(-)
create mode 100644 src/include/catalog/pg_sequence.h
(limited to 'doc/src')
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 11c2019106..7a7bbde390 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -260,6 +260,11 @@
security labels on database objects
+
+ pg_sequence
+ information about sequences
+
+
pg_shdependdependencies on shared objects
@@ -1546,7 +1551,8 @@
The catalog pg_class catalogs tables and most
everything else that has columns or is otherwise similar to a
table. This includes indexes (but see also
- pg_index), sequences, views, materialized
+ pg_index), sequences (but see also
+ pg_sequence), views, materialized
views, composite types, and TOAST tables; see relkind>.
Below, when we mean all of these
kinds of objects we speak of relations. Not all
@@ -5587,6 +5593,86 @@
+
+ pg_sequence
+
+
+ pg_sequence
+
+
+
+ The catalog pg_sequence contains information about
+ sequences. Some of the information about sequences, such as the name and
+ the schema, is in pg_class.
+
+
+
+ pg_sequence> Columns
+
+
+
+
+ Name
+ Type
+ References
+ Description
+
+
+
+
+
+ seqrelid
+ oid
+ pg_class.oid
+ The OID of the pg_class> entry for this sequence
+
+
+
+ seqstart
+ int8
+
+ Start value of the sequence
+
+
+
+ seqincrement
+ int8
+
+ Increment value of the sequence
+
+
+
+ seqmax
+ int8
+
+ Maximum value of the sequence
+
+
+
+ seqmin
+ int8
+
+ Minimum value of the sequence
+
+
+
+ seqcache
+ int8
+
+ Cache size of the sequence
+
+
+
+ seqcycle
+ bool
+
+ Whether the sequence cycles
+
+
+
+
+
+
pg_shdepend
diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile
index 2d5ac09bec..cd38c8ab3f 100644
--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@@ -42,6 +42,7 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\
pg_foreign_table.h pg_policy.h pg_replication_origin.h \
pg_default_acl.h pg_init_privs.h pg_seclabel.h pg_shseclabel.h \
pg_collation.h pg_partitioned_table.h pg_range.h pg_transform.h \
+ pg_sequence.h \
toasting.h indexing.h \
)
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c
index 0cdd1c5c6c..18a14bf146 100644
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -66,6 +66,7 @@
#include "commands/proclang.h"
#include "commands/schemacmds.h"
#include "commands/seclabel.h"
+#include "commands/sequence.h"
#include "commands/trigger.h"
#include "commands/typecmds.h"
#include "nodes/nodeFuncs.h"
@@ -1114,6 +1115,11 @@ doDeletion(const ObjectAddress *object, int flags)
else
heap_drop_with_catalog(object->objectId);
}
+
+ /* for a sequence, in addition to dropping the heap, also
+ * delete pg_sequence tuple */
+ if (relKind == RELKIND_SEQUENCE)
+ DeleteSequenceTuple(object->objectId);
break;
}
diff --git a/src/backend/catalog/information_schema.sql b/src/backend/catalog/information_schema.sql
index 00550eb804..182d2d0674 100644
--- a/src/backend/catalog/information_schema.sql
+++ b/src/backend/catalog/information_schema.sql
@@ -1535,15 +1535,16 @@ CREATE VIEW sequences AS
CAST(64 AS cardinal_number) AS numeric_precision,
CAST(2 AS cardinal_number) AS numeric_precision_radix,
CAST(0 AS cardinal_number) AS numeric_scale,
- CAST(p.start_value AS character_data) AS start_value,
- CAST(p.minimum_value AS character_data) AS minimum_value,
- CAST(p.maximum_value AS character_data) AS maximum_value,
- CAST(p.increment AS character_data) AS increment,
- CAST(CASE WHEN p.cycle_option THEN 'YES' ELSE 'NO' END AS yes_or_no) AS cycle_option
- FROM pg_namespace nc, pg_class c, LATERAL pg_sequence_parameters(c.oid) p
+ CAST(s.seqstart AS character_data) AS start_value,
+ CAST(s.seqmin AS character_data) AS minimum_value,
+ CAST(s.seqmax AS character_data) AS maximum_value,
+ CAST(s.seqincrement AS character_data) AS increment,
+ CAST(CASE WHEN s.seqcycle THEN 'YES' ELSE 'NO' END AS yes_or_no) AS cycle_option
+ FROM pg_namespace nc, pg_class c, pg_sequence s
WHERE c.relnamespace = nc.oid
AND c.relkind = 'S'
AND (NOT pg_is_other_temp_schema(nc.oid))
+ AND c.oid = s.seqrelid
AND (pg_has_role(c.relowner, 'USAGE')
OR has_sequence_privilege(c.oid, 'SELECT, UPDATE, USAGE') );
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 48e7c4b7f9..649cef86c5 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -169,15 +169,15 @@ CREATE OR REPLACE VIEW pg_sequences AS
N.nspname AS schemaname,
C.relname AS sequencename,
pg_get_userbyid(C.relowner) AS sequenceowner,
- p.start_value AS start_value,
- p.minimum_value AS min_value,
- p.maximum_value AS max_value,
- p.increment AS increment_by,
- p.cycle_option AS cycle,
- p.cache_size AS cache_size,
+ S.seqstart AS start_value,
+ S.seqmin AS min_value,
+ S.seqmax AS max_value,
+ S.seqincrement AS increment_by,
+ S.seqcycle AS cycle,
+ S.seqcache AS cache_size,
pg_sequence_last_value(C.oid) AS last_value
- FROM pg_class C LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace),
- LATERAL pg_sequence_parameters(C.oid) p
+ FROM pg_sequence S JOIN pg_class C ON (C.oid = S.seqrelid)
+ LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace)
WHERE NOT pg_is_other_temp_schema(N.oid)
AND relkind = 'S';
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
index d953b4408b..cdd32bc17e 100644
--- a/src/backend/commands/sequence.c
+++ b/src/backend/commands/sequence.c
@@ -22,8 +22,10 @@
#include "access/xloginsert.h"
#include "access/xlogutils.h"
#include "catalog/dependency.h"
+#include "catalog/indexing.h"
#include "catalog/namespace.h"
#include "catalog/objectaccess.h"
+#include "catalog/pg_sequence.h"
#include "catalog/pg_type.h"
#include "commands/defrem.h"
#include "commands/sequence.h"
@@ -74,7 +76,7 @@ typedef struct SeqTableData
int64 cached; /* last value already cached for nextval */
/* if last != cached, we have not used up all the cached values */
int64 increment; /* copy of sequence's increment field */
- /* note that increment is zero until we first do read_seq_tuple() */
+ /* note that increment is zero until we first do nextval_internal() */
} SeqTableData;
typedef SeqTableData *SeqTable;
@@ -92,10 +94,11 @@ static int64 nextval_internal(Oid relid);
static Relation open_share_lock(SeqTable seq);
static void create_seq_hashtable(void);
static void init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel);
-static Form_pg_sequence read_seq_tuple(SeqTable elm, Relation rel,
- Buffer *buf, HeapTuple seqtuple);
+static Form_pg_sequence_data read_seq_tuple(Relation rel,
+ Buffer *buf, HeapTuple seqdatatuple);
static void init_params(ParseState *pstate, List *options, bool isInit,
- Form_pg_sequence new, List **owned_by);
+ Form_pg_sequence seqform,
+ Form_pg_sequence_data seqdataform, List **owned_by);
static void do_setval(Oid relid, int64 next, bool iscalled);
static void process_owned_by(Relation seqrel, List *owned_by);
@@ -107,7 +110,8 @@ static void process_owned_by(Relation seqrel, List *owned_by);
ObjectAddress
DefineSequence(ParseState *pstate, CreateSeqStmt *seq)
{
- FormData_pg_sequence new;
+ FormData_pg_sequence seqform;
+ FormData_pg_sequence_data seqdataform;
List *owned_by;
CreateStmt *stmt = makeNode(CreateStmt);
Oid seqoid;
@@ -117,8 +121,9 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq)
TupleDesc tupDesc;
Datum value[SEQ_COL_LASTCOL];
bool null[SEQ_COL_LASTCOL];
+ Datum pgs_values[Natts_pg_sequence];
+ bool pgs_nulls[Natts_pg_sequence];
int i;
- NameData name;
/* Unlogged sequences are not implemented -- not clear if useful. */
if (seq->sequence->relpersistence == RELPERSISTENCE_UNLOGGED)
@@ -145,7 +150,7 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq)
}
/* Check and set all option values */
- init_params(pstate, seq->options, true, &new, &owned_by);
+ init_params(pstate, seq->options, true, &seqform, &seqdataform, &owned_by);
/*
* Create relation (and fill value[] and null[] for the tuple)
@@ -171,52 +176,16 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq)
switch (i)
{
- case SEQ_COL_NAME:
- coldef->typeName = makeTypeNameFromOid(NAMEOID, -1);
- coldef->colname = "sequence_name";
- namestrcpy(&name, seq->sequence->relname);
- value[i - 1] = NameGetDatum(&name);
- break;
case SEQ_COL_LASTVAL:
coldef->typeName = makeTypeNameFromOid(INT8OID, -1);
coldef->colname = "last_value";
- value[i - 1] = Int64GetDatumFast(new.last_value);
- break;
- case SEQ_COL_STARTVAL:
- coldef->typeName = makeTypeNameFromOid(INT8OID, -1);
- coldef->colname = "start_value";
- value[i - 1] = Int64GetDatumFast(new.start_value);
- break;
- case SEQ_COL_INCBY:
- coldef->typeName = makeTypeNameFromOid(INT8OID, -1);
- coldef->colname = "increment_by";
- value[i - 1] = Int64GetDatumFast(new.increment_by);
- break;
- case SEQ_COL_MAXVALUE:
- coldef->typeName = makeTypeNameFromOid(INT8OID, -1);
- coldef->colname = "max_value";
- value[i - 1] = Int64GetDatumFast(new.max_value);
- break;
- case SEQ_COL_MINVALUE:
- coldef->typeName = makeTypeNameFromOid(INT8OID, -1);
- coldef->colname = "min_value";
- value[i - 1] = Int64GetDatumFast(new.min_value);
- break;
- case SEQ_COL_CACHE:
- coldef->typeName = makeTypeNameFromOid(INT8OID, -1);
- coldef->colname = "cache_value";
- value[i - 1] = Int64GetDatumFast(new.cache_value);
+ value[i - 1] = Int64GetDatumFast(seqdataform.last_value);
break;
case SEQ_COL_LOG:
coldef->typeName = makeTypeNameFromOid(INT8OID, -1);
coldef->colname = "log_cnt";
value[i - 1] = Int64GetDatum((int64) 0);
break;
- case SEQ_COL_CYCLE:
- coldef->typeName = makeTypeNameFromOid(BOOLOID, -1);
- coldef->colname = "is_cycled";
- value[i - 1] = BoolGetDatum(new.is_cycled);
- break;
case SEQ_COL_CALLED:
coldef->typeName = makeTypeNameFromOid(BOOLOID, -1);
coldef->colname = "is_called";
@@ -251,6 +220,27 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq)
heap_close(rel, NoLock);
+ /* fill in pg_sequence */
+ rel = heap_open(SequenceRelationId, RowExclusiveLock);
+ tupDesc = RelationGetDescr(rel);
+
+ memset(pgs_nulls, 0, sizeof(pgs_nulls));
+
+ pgs_values[Anum_pg_sequence_seqrelid - 1] = ObjectIdGetDatum(seqoid);
+ pgs_values[Anum_pg_sequence_seqstart - 1] = Int64GetDatumFast(seqform.seqstart);
+ pgs_values[Anum_pg_sequence_seqincrement - 1] = Int64GetDatumFast(seqform.seqincrement);
+ pgs_values[Anum_pg_sequence_seqmax - 1] = Int64GetDatumFast(seqform.seqmax);
+ pgs_values[Anum_pg_sequence_seqmin - 1] = Int64GetDatumFast(seqform.seqmin);
+ pgs_values[Anum_pg_sequence_seqcache - 1] = Int64GetDatumFast(seqform.seqcache);
+ pgs_values[Anum_pg_sequence_seqcycle - 1] = BoolGetDatum(seqform.seqcycle);
+
+ tuple = heap_form_tuple(tupDesc, pgs_values, pgs_nulls);
+ simple_heap_insert(rel, tuple);
+ CatalogUpdateIndexes(rel, tuple);
+
+ heap_freetuple(tuple);
+ heap_close(rel, RowExclusiveLock);
+
return address;
}
@@ -271,10 +261,13 @@ ResetSequence(Oid seq_relid)
{
Relation seq_rel;
SeqTable elm;
- Form_pg_sequence seq;
+ Form_pg_sequence_data seq;
Buffer buf;
- HeapTupleData seqtuple;
+ HeapTupleData seqdatatuple;
HeapTuple tuple;
+ HeapTuple pgstuple;
+ Form_pg_sequence pgsform;
+ int64 startv;
/*
* Read the old sequence. This does a bit more work than really
@@ -282,12 +275,19 @@ ResetSequence(Oid seq_relid)
* indeed a sequence.
*/
init_sequence(seq_relid, &elm, &seq_rel);
- (void) read_seq_tuple(elm, seq_rel, &buf, &seqtuple);
+ (void) read_seq_tuple(seq_rel, &buf, &seqdatatuple);
+
+ pgstuple = SearchSysCache1(SEQRELID, ObjectIdGetDatum(seq_relid));
+ if (!HeapTupleIsValid(pgstuple))
+ elog(ERROR, "cache lookup failed for sequence %u", seq_relid);
+ pgsform = (Form_pg_sequence) GETSTRUCT(pgstuple);
+ startv = pgsform->seqstart;
+ ReleaseSysCache(pgstuple);
/*
* Copy the existing sequence tuple.
*/
- tuple = heap_copytuple(&seqtuple);
+ tuple = heap_copytuple(&seqdatatuple);
/* Now we're done with the old page */
UnlockReleaseBuffer(buf);
@@ -296,8 +296,8 @@ ResetSequence(Oid seq_relid)
* Modify the copied tuple to execute the restart (compare the RESTART
* action in AlterSequence)
*/
- seq = (Form_pg_sequence) GETSTRUCT(tuple);
- seq->last_value = seq->start_value;
+ seq = (Form_pg_sequence_data) GETSTRUCT(tuple);
+ seq->last_value = startv;
seq->is_called = false;
seq->log_cnt = 0;
@@ -410,11 +410,14 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt)
SeqTable elm;
Relation seqrel;
Buffer buf;
- HeapTupleData seqtuple;
- Form_pg_sequence seq;
- FormData_pg_sequence new;
+ HeapTupleData seqdatatuple;
+ Form_pg_sequence seqform;
+ Form_pg_sequence_data seqdata;
+ FormData_pg_sequence_data newseqdata;
List *owned_by;
ObjectAddress address;
+ Relation rel;
+ HeapTuple tuple;
/* Open and lock sequence. */
relid = RangeVarGetRelid(stmt->sequence, AccessShareLock, stmt->missing_ok);
@@ -434,13 +437,22 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt)
stmt->sequence->relname);
/* lock page' buffer and read tuple into new sequence structure */
- seq = read_seq_tuple(elm, seqrel, &buf, &seqtuple);
+ seqdata = read_seq_tuple(seqrel, &buf, &seqdatatuple);
/* Copy old values of options into workspace */
- memcpy(&new, seq, sizeof(FormData_pg_sequence));
+ memcpy(&newseqdata, seqdata, sizeof(FormData_pg_sequence_data));
+
+ rel = heap_open(SequenceRelationId, RowExclusiveLock);
+ tuple = SearchSysCacheCopy1(SEQRELID,
+ ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for sequence %u",
+ relid);
+
+ seqform = (Form_pg_sequence) GETSTRUCT(tuple);
/* Check and set new values */
- init_params(pstate, stmt->options, false, &new, &owned_by);
+ init_params(pstate, stmt->options, false, seqform, &newseqdata, &owned_by);
/* Clear local cache so that we don't think we have cached numbers */
/* Note that we do not change the currval() state */
@@ -453,7 +465,7 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt)
/* Now okay to update the on-disk tuple */
START_CRIT_SECTION();
- memcpy(seq, &new, sizeof(FormData_pg_sequence));
+ memcpy(seqdata, &newseqdata, sizeof(FormData_pg_sequence_data));
MarkBufferDirty(buf);
@@ -470,7 +482,7 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt)
xlrec.node = seqrel->rd_node;
XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
- XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len);
+ XLogRegisterData((char *) seqdatatuple.t_data, seqdatatuple.t_len);
recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
@@ -491,9 +503,30 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt)
relation_close(seqrel, NoLock);
+ simple_heap_update(rel, &tuple->t_self, tuple);
+ CatalogUpdateIndexes(rel, tuple);
+ heap_close(rel, RowExclusiveLock);
+
return address;
}
+void
+DeleteSequenceTuple(Oid relid)
+{
+ Relation rel;
+ HeapTuple tuple;
+
+ rel = heap_open(SequenceRelationId, RowExclusiveLock);
+
+ tuple = SearchSysCache1(SEQRELID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for sequence %u", relid);
+
+ simple_heap_delete(rel, &tuple->t_self);
+
+ ReleaseSysCache(tuple);
+ heap_close(rel, RowExclusiveLock);
+}
/*
* Note: nextval with a text argument is no longer exported as a pg_proc
@@ -537,8 +570,10 @@ nextval_internal(Oid relid)
Relation seqrel;
Buffer buf;
Page page;
- HeapTupleData seqtuple;
- Form_pg_sequence seq;
+ HeapTuple pgstuple;
+ Form_pg_sequence pgsform;
+ HeapTupleData seqdatatuple;
+ Form_pg_sequence_data seq;
int64 incby,
maxv,
minv,
@@ -549,6 +584,7 @@ nextval_internal(Oid relid)
int64 result,
next,
rescnt = 0;
+ bool cycle;
bool logit = false;
/* open and AccessShareLock sequence */
@@ -582,15 +618,24 @@ nextval_internal(Oid relid)
return elm->last;
}
+ pgstuple = SearchSysCache1(SEQRELID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(pgstuple))
+ elog(ERROR, "cache lookup failed for sequence %u", relid);
+ pgsform = (Form_pg_sequence) GETSTRUCT(pgstuple);
+ incby = pgsform->seqincrement;
+ maxv = pgsform->seqmax;
+ minv = pgsform->seqmin;
+ cache = pgsform->seqcache;
+ cycle = pgsform->seqcycle;
+ ReleaseSysCache(pgstuple);
+
/* lock page' buffer and read tuple */
- seq = read_seq_tuple(elm, seqrel, &buf, &seqtuple);
+ seq = read_seq_tuple(seqrel, &buf, &seqdatatuple);
page = BufferGetPage(buf);
+ elm->increment = incby;
last = next = result = seq->last_value;
- incby = seq->increment_by;
- maxv = seq->max_value;
- minv = seq->min_value;
- fetch = cache = seq->cache_value;
+ fetch = cache;
log = seq->log_cnt;
if (!seq->is_called)
@@ -641,7 +686,7 @@ nextval_internal(Oid relid)
{
if (rescnt > 0)
break; /* stop fetching */
- if (!seq->is_cycled)
+ if (!cycle)
{
char buf[100];
@@ -664,7 +709,7 @@ nextval_internal(Oid relid)
{
if (rescnt > 0)
break; /* stop fetching */
- if (!seq->is_cycled)
+ if (!cycle)
{
char buf[100];
@@ -747,7 +792,7 @@ nextval_internal(Oid relid)
xlrec.node = seqrel->rd_node;
XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
- XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len);
+ XLogRegisterData((char *) seqdatatuple.t_data, seqdatatuple.t_len);
recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
@@ -853,8 +898,12 @@ do_setval(Oid relid, int64 next, bool iscalled)
SeqTable elm;
Relation seqrel;
Buffer buf;
- HeapTupleData seqtuple;
- Form_pg_sequence seq;
+ HeapTupleData seqdatatuple;
+ Form_pg_sequence_data seq;
+ HeapTuple pgstuple;
+ Form_pg_sequence pgsform;
+ int64 maxv,
+ minv;
/* open and AccessShareLock sequence */
init_sequence(relid, &elm, &seqrel);
@@ -865,6 +914,14 @@ do_setval(Oid relid, int64 next, bool iscalled)
errmsg("permission denied for sequence %s",
RelationGetRelationName(seqrel))));
+ pgstuple = SearchSysCache1(SEQRELID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(pgstuple))
+ elog(ERROR, "cache lookup failed for sequence %u", relid);
+ pgsform = (Form_pg_sequence) GETSTRUCT(pgstuple);
+ maxv = pgsform->seqmax;
+ minv = pgsform->seqmin;
+ ReleaseSysCache(pgstuple);
+
/* read-only transactions may only modify temp sequences */
if (!seqrel->rd_islocaltemp)
PreventCommandIfReadOnly("setval()");
@@ -877,17 +934,17 @@ do_setval(Oid relid, int64 next, bool iscalled)
PreventCommandIfParallelMode("setval()");
/* lock page' buffer and read tuple */
- seq = read_seq_tuple(elm, seqrel, &buf, &seqtuple);
+ seq = read_seq_tuple(seqrel, &buf, &seqdatatuple);
- if ((next < seq->min_value) || (next > seq->max_value))
+ if ((next < minv) || (next > maxv))
{
char bufv[100],
bufm[100],
bufx[100];
snprintf(bufv, sizeof(bufv), INT64_FORMAT, next);
- snprintf(bufm, sizeof(bufm), INT64_FORMAT, seq->min_value);
- snprintf(bufx, sizeof(bufx), INT64_FORMAT, seq->max_value);
+ snprintf(bufm, sizeof(bufm), INT64_FORMAT, minv);
+ snprintf(bufx, sizeof(bufx), INT64_FORMAT, maxv);
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("setval: value %s is out of bounds for sequence \"%s\" (%s..%s)",
@@ -930,7 +987,7 @@ do_setval(Oid relid, int64 next, bool iscalled)
xlrec.node = seqrel->rd_node;
XLogRegisterData((char *) &xlrec, sizeof(xl_seq_rec));
- XLogRegisterData((char *) seqtuple.t_data, seqtuple.t_len);
+ XLogRegisterData((char *) seqdatatuple.t_data, seqdatatuple.t_len);
recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG);
@@ -1064,7 +1121,7 @@ init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel)
elm->filenode = InvalidOid;
elm->lxid = InvalidLocalTransactionId;
elm->last_valid = false;
- elm->last = elm->cached = elm->increment = 0;
+ elm->last = elm->cached = 0;
}
/*
@@ -1099,18 +1156,18 @@ init_sequence(Oid relid, SeqTable *p_elm, Relation *p_rel)
* Given an opened sequence relation, lock the page buffer and find the tuple
*
* *buf receives the reference to the pinned-and-ex-locked buffer
- * *seqtuple receives the reference to the sequence tuple proper
+ * *seqdatatuple receives the reference to the sequence tuple proper
* (this arg should point to a local variable of type HeapTupleData)
*
* Function's return value points to the data payload of the tuple
*/
-static Form_pg_sequence
-read_seq_tuple(SeqTable elm, Relation rel, Buffer *buf, HeapTuple seqtuple)
+static Form_pg_sequence_data
+read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple)
{
Page page;
ItemId lp;
sequence_magic *sm;
- Form_pg_sequence seq;
+ Form_pg_sequence_data seq;
*buf = ReadBuffer(rel, 0);
LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE);
@@ -1125,9 +1182,9 @@ read_seq_tuple(SeqTable elm, Relation rel, Buffer *buf, HeapTuple seqtuple)
lp = PageGetItemId(page, FirstOffsetNumber);
Assert(ItemIdIsNormal(lp));
- /* Note we currently only bother to set these two fields of *seqtuple */
- seqtuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
- seqtuple->t_len = ItemIdGetLength(lp);
+ /* Note we currently only bother to set these two fields of *seqdatatuple */
+ seqdatatuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
+ seqdatatuple->t_len = ItemIdGetLength(lp);
/*
* Previous releases of Postgres neglected to prevent SELECT FOR UPDATE on
@@ -1137,19 +1194,16 @@ read_seq_tuple(SeqTable elm, Relation rel, Buffer *buf, HeapTuple seqtuple)
* bit update, ie, don't bother to WAL-log it, since we can certainly do
* this again if the update gets lost.
*/
- Assert(!(seqtuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
- if (HeapTupleHeaderGetRawXmax(seqtuple->t_data) != InvalidTransactionId)
+ Assert(!(seqdatatuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
+ if (HeapTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId)
{
- HeapTupleHeaderSetXmax(seqtuple->t_data, InvalidTransactionId);
- seqtuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED;
- seqtuple->t_data->t_infomask |= HEAP_XMAX_INVALID;
+ HeapTupleHeaderSetXmax(seqdatatuple->t_data, InvalidTransactionId);
+ seqdatatuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED;
+ seqdatatuple->t_data->t_infomask |= HEAP_XMAX_INVALID;
MarkBufferDirtyHint(*buf, true);
}
- seq = (Form_pg_sequence) GETSTRUCT(seqtuple);
-
- /* this is a handy place to update our copy of the increment */
- elm->increment = seq->increment_by;
+ seq = (Form_pg_sequence_data) GETSTRUCT(seqdatatuple);
return seq;
}
@@ -1164,7 +1218,8 @@ read_seq_tuple(SeqTable elm, Relation rel, Buffer *buf, HeapTuple seqtuple)
*/
static void
init_params(ParseState *pstate, List *options, bool isInit,
- Form_pg_sequence new, List **owned_by)
+ Form_pg_sequence seqform,
+ Form_pg_sequence_data seqdataform, List **owned_by)
{
DefElem *start_value = NULL;
DefElem *restart_value = NULL;
@@ -1263,69 +1318,69 @@ init_params(ParseState *pstate, List *options, bool isInit,
* would affect future nextval allocations.
*/
if (isInit)
- new->log_cnt = 0;
+ seqdataform->log_cnt = 0;
/* INCREMENT BY */
if (increment_by != NULL)
{
- new->increment_by = defGetInt64(increment_by);
- if (new->increment_by == 0)
+ seqform->seqincrement = defGetInt64(increment_by);
+ if (seqform->seqincrement == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("INCREMENT must not be zero")));
- new->log_cnt = 0;
+ seqdataform->log_cnt = 0;
}
else if (isInit)
- new->increment_by = 1;
+ seqform->seqincrement = 1;
/* CYCLE */
if (is_cycled != NULL)
{
- new->is_cycled = intVal(is_cycled->arg);
- Assert(BoolIsValid(new->is_cycled));
- new->log_cnt = 0;
+ seqform->seqcycle = intVal(is_cycled->arg);
+ Assert(BoolIsValid(seqform->seqcycle));
+ seqdataform->log_cnt = 0;
}
else if (isInit)
- new->is_cycled = false;
+ seqform->seqcycle = false;
/* MAXVALUE (null arg means NO MAXVALUE) */
if (max_value != NULL && max_value->arg)
{
- new->max_value = defGetInt64(max_value);
- new->log_cnt = 0;
+ seqform->seqmax = defGetInt64(max_value);
+ seqdataform->log_cnt = 0;
}
else if (isInit || max_value != NULL)
{
- if (new->increment_by > 0)
- new->max_value = SEQ_MAXVALUE; /* ascending seq */
+ if (seqform->seqincrement > 0)
+ seqform->seqmax = SEQ_MAXVALUE; /* ascending seq */
else
- new->max_value = -1; /* descending seq */
- new->log_cnt = 0;
+ seqform->seqmax = -1; /* descending seq */
+ seqdataform->log_cnt = 0;
}
/* MINVALUE (null arg means NO MINVALUE) */
if (min_value != NULL && min_value->arg)
{
- new->min_value = defGetInt64(min_value);
- new->log_cnt = 0;
+ seqform->seqmin = defGetInt64(min_value);
+ seqdataform->log_cnt = 0;
}
else if (isInit || min_value != NULL)
{
- if (new->increment_by > 0)
- new->min_value = 1; /* ascending seq */
+ if (seqform->seqincrement > 0)
+ seqform->seqmin = 1; /* ascending seq */
else
- new->min_value = SEQ_MINVALUE; /* descending seq */
- new->log_cnt = 0;
+ seqform->seqmin = SEQ_MINVALUE; /* descending seq */
+ seqdataform->log_cnt = 0;
}
/* crosscheck min/max */
- if (new->min_value >= new->max_value)
+ if (seqform->seqmin >= seqform->seqmax)
{
char bufm[100],
bufx[100];
- snprintf(bufm, sizeof(bufm), INT64_FORMAT, new->min_value);
- snprintf(bufx, sizeof(bufx), INT64_FORMAT, new->max_value);
+ snprintf(bufm, sizeof(bufm), INT64_FORMAT, seqform->seqmin);
+ snprintf(bufx, sizeof(bufx), INT64_FORMAT, seqform->seqmax);
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("MINVALUE (%s) must be less than MAXVALUE (%s)",
@@ -1334,35 +1389,35 @@ init_params(ParseState *pstate, List *options, bool isInit,
/* START WITH */
if (start_value != NULL)
- new->start_value = defGetInt64(start_value);
+ seqform->seqstart = defGetInt64(start_value);
else if (isInit)
{
- if (new->increment_by > 0)
- new->start_value = new->min_value; /* ascending seq */
+ if (seqform->seqincrement > 0)
+ seqform->seqstart = seqform->seqmin; /* ascending seq */
else
- new->start_value = new->max_value; /* descending seq */
+ seqform->seqstart = seqform->seqmax; /* descending seq */
}
/* crosscheck START */
- if (new->start_value < new->min_value)
+ if (seqform->seqstart < seqform->seqmin)
{
char bufs[100],
bufm[100];
- snprintf(bufs, sizeof(bufs), INT64_FORMAT, new->start_value);
- snprintf(bufm, sizeof(bufm), INT64_FORMAT, new->min_value);
+ snprintf(bufs, sizeof(bufs), INT64_FORMAT, seqform->seqstart);
+ snprintf(bufm, sizeof(bufm), INT64_FORMAT, seqform->seqmin);
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("START value (%s) cannot be less than MINVALUE (%s)",
bufs, bufm)));
}
- if (new->start_value > new->max_value)
+ if (seqform->seqstart > seqform->seqmax)
{
char bufs[100],
bufm[100];
- snprintf(bufs, sizeof(bufs), INT64_FORMAT, new->start_value);
- snprintf(bufm, sizeof(bufm), INT64_FORMAT, new->max_value);
+ snprintf(bufs, sizeof(bufs), INT64_FORMAT, seqform->seqstart);
+ snprintf(bufm, sizeof(bufm), INT64_FORMAT, seqform->seqmax);
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("START value (%s) cannot be greater than MAXVALUE (%s)",
@@ -1373,38 +1428,38 @@ init_params(ParseState *pstate, List *options, bool isInit,
if (restart_value != NULL)
{
if (restart_value->arg != NULL)
- new->last_value = defGetInt64(restart_value);
+ seqdataform->last_value = defGetInt64(restart_value);
else
- new->last_value = new->start_value;
- new->is_called = false;
- new->log_cnt = 0;
+ seqdataform->last_value = seqform->seqstart;
+ seqdataform->is_called = false;
+ seqdataform->log_cnt = 0;
}
else if (isInit)
{
- new->last_value = new->start_value;
- new->is_called = false;
+ seqdataform->last_value = seqform->seqstart;
+ seqdataform->is_called = false;
}
/* crosscheck RESTART (or current value, if changing MIN/MAX) */
- if (new->last_value < new->min_value)
+ if (seqdataform->last_value < seqform->seqmin)
{
char bufs[100],
bufm[100];
- snprintf(bufs, sizeof(bufs), INT64_FORMAT, new->last_value);
- snprintf(bufm, sizeof(bufm), INT64_FORMAT, new->min_value);
+ snprintf(bufs, sizeof(bufs), INT64_FORMAT, seqdataform->last_value);
+ snprintf(bufm, sizeof(bufm), INT64_FORMAT, seqform->seqmin);
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("RESTART value (%s) cannot be less than MINVALUE (%s)",
bufs, bufm)));
}
- if (new->last_value > new->max_value)
+ if (seqdataform->last_value > seqform->seqmax)
{
char bufs[100],
bufm[100];
- snprintf(bufs, sizeof(bufs), INT64_FORMAT, new->last_value);
- snprintf(bufm, sizeof(bufm), INT64_FORMAT, new->max_value);
+ snprintf(bufs, sizeof(bufs), INT64_FORMAT, seqdataform->last_value);
+ snprintf(bufm, sizeof(bufm), INT64_FORMAT, seqform->seqmax);
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("RESTART value (%s) cannot be greater than MAXVALUE (%s)",
@@ -1414,21 +1469,21 @@ init_params(ParseState *pstate, List *options, bool isInit,
/* CACHE */
if (cache_value != NULL)
{
- new->cache_value = defGetInt64(cache_value);
- if (new->cache_value <= 0)
+ seqform->seqcache = defGetInt64(cache_value);
+ if (seqform->seqcache <= 0)
{
char buf[100];
- snprintf(buf, sizeof(buf), INT64_FORMAT, new->cache_value);
+ snprintf(buf, sizeof(buf), INT64_FORMAT, seqform->seqcache);
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("CACHE (%s) must be greater than zero",
buf)));
}
- new->log_cnt = 0;
+ seqdataform->log_cnt = 0;
}
else if (isInit)
- new->cache_value = 1;
+ seqform->seqcache = 1;
}
/*
@@ -1528,7 +1583,7 @@ process_owned_by(Relation seqrel, List *owned_by)
/*
- * Return sequence parameters, for use by information schema
+ * Return sequence parameters (formerly for use by information schema)
*/
Datum
pg_sequence_parameters(PG_FUNCTION_ARGS)
@@ -1537,20 +1592,14 @@ pg_sequence_parameters(PG_FUNCTION_ARGS)
TupleDesc tupdesc;
Datum values[6];
bool isnull[6];
- SeqTable elm;
- Relation seqrel;
- Buffer buf;
- HeapTupleData seqtuple;
- Form_pg_sequence seq;
-
- /* open and AccessShareLock sequence */
- init_sequence(relid, &elm, &seqrel);
+ HeapTuple pgstuple;
+ Form_pg_sequence pgsform;
if (pg_class_aclcheck(relid, GetUserId(), ACL_SELECT | ACL_UPDATE | ACL_USAGE) != ACLCHECK_OK)
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("permission denied for sequence %s",
- RelationGetRelationName(seqrel))));
+ get_rel_name(relid))));
tupdesc = CreateTemplateTupleDesc(6, false);
TupleDescInitEntry(tupdesc, (AttrNumber) 1, "start_value",
@@ -1570,17 +1619,19 @@ pg_sequence_parameters(PG_FUNCTION_ARGS)
memset(isnull, 0, sizeof(isnull));
- seq = read_seq_tuple(elm, seqrel, &buf, &seqtuple);
+ pgstuple = SearchSysCache1(SEQRELID, relid);
+ if (!HeapTupleIsValid(pgstuple))
+ elog(ERROR, "cache lookup failed for sequence %u", relid);
+ pgsform = (Form_pg_sequence) GETSTRUCT(pgstuple);
- values[0] = Int64GetDatum(seq->start_value);
- values[1] = Int64GetDatum(seq->min_value);
- values[2] = Int64GetDatum(seq->max_value);
- values[3] = Int64GetDatum(seq->increment_by);
- values[4] = BoolGetDatum(seq->is_cycled);
- values[5] = Int64GetDatum(seq->cache_value);
+ values[0] = Int64GetDatum(pgsform->seqstart);
+ values[1] = Int64GetDatum(pgsform->seqmin);
+ values[2] = Int64GetDatum(pgsform->seqmax);
+ values[3] = Int64GetDatum(pgsform->seqincrement);
+ values[4] = BoolGetDatum(pgsform->seqcycle);
+ values[5] = Int64GetDatum(pgsform->seqcache);
- UnlockReleaseBuffer(buf);
- relation_close(seqrel, NoLock);
+ ReleaseSysCache(pgstuple);
return HeapTupleGetDatum(heap_form_tuple(tupdesc, values, isnull));
}
@@ -1598,7 +1649,7 @@ pg_sequence_last_value(PG_FUNCTION_ARGS)
Relation seqrel;
Buffer buf;
HeapTupleData seqtuple;
- Form_pg_sequence seq;
+ Form_pg_sequence_data seq;
bool is_called;
int64 result;
@@ -1611,7 +1662,7 @@ pg_sequence_last_value(PG_FUNCTION_ARGS)
errmsg("permission denied for sequence %s",
RelationGetRelationName(seqrel))));
- seq = read_seq_tuple(elm, seqrel, &buf, &seqtuple);
+ seq = read_seq_tuple(seqrel, &buf, &seqtuple);
is_called = seq->is_called;
result = seq->last_value;
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index a3e0517b94..e87fe0ef54 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -53,6 +53,7 @@
#include "catalog/pg_range.h"
#include "catalog/pg_rewrite.h"
#include "catalog/pg_seclabel.h"
+#include "catalog/pg_sequence.h"
#include "catalog/pg_shdepend.h"
#include "catalog/pg_shdescription.h"
#include "catalog/pg_shseclabel.h"
@@ -668,6 +669,17 @@ static const struct cachedesc cacheinfo[] = {
},
8
},
+ {SequenceRelationId, /* SEQRELID */
+ SequenceRelidIndexId,
+ 1,
+ {
+ Anum_pg_sequence_seqrelid,
+ 0,
+ 0,
+ 0
+ },
+ 32
+ },
{StatisticRelationId, /* STATRELATTINH */
StatisticRelidAttnumInhIndexId,
3,
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 7949aad367..c5adc9da38 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -15351,7 +15351,27 @@ dumpSequence(Archive *fout, TableInfo *tbinfo)
snprintf(bufm, sizeof(bufm), INT64_FORMAT, SEQ_MINVALUE);
snprintf(bufx, sizeof(bufx), INT64_FORMAT, SEQ_MAXVALUE);
- if (fout->remoteVersion >= 80400)
+ if (fout->remoteVersion >= 100000)
+ {
+ appendPQExpBuffer(query,
+ "SELECT relname, "
+ "seqstart, seqincrement, "
+ "CASE WHEN seqincrement > 0 AND seqmax = %s THEN NULL "
+ " WHEN seqincrement < 0 AND seqmax = -1 THEN NULL "
+ " ELSE seqmax "
+ "END AS seqmax, "
+ "CASE WHEN seqincrement > 0 AND seqmin = 1 THEN NULL "
+ " WHEN seqincrement < 0 AND seqmin = %s THEN NULL "
+ " ELSE seqmin "
+ "END AS seqmin, "
+ "seqcache, seqcycle "
+ "FROM pg_class c "
+ "JOIN pg_sequence s ON (s.seqrelid = c.oid) "
+ "WHERE relname = ",
+ bufx, bufm);
+ appendStringLiteralAH(query, tbinfo->dobj.name, fout);
+ }
+ else if (fout->remoteVersion >= 80400)
{
appendPQExpBuffer(query,
"SELECT sequence_name, "
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index e13e3991df..5779f0d617 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201612121
+#define CATALOG_VERSION_NO 201612201
#endif
diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h
index 40f7576b7b..293985db4e 100644
--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -209,6 +209,9 @@ DECLARE_UNIQUE_INDEX(pg_rewrite_oid_index, 2692, on pg_rewrite using btree(oid o
DECLARE_UNIQUE_INDEX(pg_rewrite_rel_rulename_index, 2693, on pg_rewrite using btree(ev_class oid_ops, rulename name_ops));
#define RewriteRelRulenameIndexId 2693
+DECLARE_UNIQUE_INDEX(pg_sequence_seqrelid_index, 5002, on pg_sequence using btree(seqrelid oid_ops));
+#define SequenceRelidIndexId 5002
+
DECLARE_INDEX(pg_shdepend_depender_index, 1232, on pg_shdepend using btree(dbid oid_ops, classid oid_ops, objid oid_ops, objsubid int4_ops));
#define SharedDependDependerIndexId 1232
DECLARE_INDEX(pg_shdepend_reference_index, 1233, on pg_shdepend using btree(refclassid oid_ops, refobjid oid_ops));
diff --git a/src/include/catalog/pg_sequence.h b/src/include/catalog/pg_sequence.h
new file mode 100644
index 0000000000..3bcda6bef1
--- /dev/null
+++ b/src/include/catalog/pg_sequence.h
@@ -0,0 +1,30 @@
+#ifndef PG_SEQUENCE_H
+#define PG_SEQUENCE_H
+
+#include "catalog/genbki.h"
+
+#define SequenceRelationId 2224
+
+CATALOG(pg_sequence,2224) BKI_WITHOUT_OIDS
+{
+ Oid seqrelid;
+ int64 seqstart;
+ int64 seqincrement;
+ int64 seqmax;
+ int64 seqmin;
+ int64 seqcache;
+ bool seqcycle;
+} FormData_pg_sequence;
+
+typedef FormData_pg_sequence *Form_pg_sequence;
+
+#define Natts_pg_sequence 7
+#define Anum_pg_sequence_seqrelid 1
+#define Anum_pg_sequence_seqstart 2
+#define Anum_pg_sequence_seqincrement 3
+#define Anum_pg_sequence_seqmax 4
+#define Anum_pg_sequence_seqmin 5
+#define Anum_pg_sequence_seqcache 6
+#define Anum_pg_sequence_seqcycle 7
+
+#endif /* PG_SEQUENCE_H */
diff --git a/src/include/commands/sequence.h b/src/include/commands/sequence.h
index 6695bbe4c7..1fd75b216d 100644
--- a/src/include/commands/sequence.h
+++ b/src/include/commands/sequence.h
@@ -22,38 +22,24 @@
#include "storage/relfilenode.h"
-typedef struct FormData_pg_sequence
+typedef struct FormData_pg_sequence_data
{
- NameData sequence_name;
int64 last_value;
- int64 start_value;
- int64 increment_by;
- int64 max_value;
- int64 min_value;
- int64 cache_value;
int64 log_cnt;
- bool is_cycled;
bool is_called;
-} FormData_pg_sequence;
+} FormData_pg_sequence_data;
-typedef FormData_pg_sequence *Form_pg_sequence;
+typedef FormData_pg_sequence_data *Form_pg_sequence_data;
/*
* Columns of a sequence relation
*/
-#define SEQ_COL_NAME 1
-#define SEQ_COL_LASTVAL 2
-#define SEQ_COL_STARTVAL 3
-#define SEQ_COL_INCBY 4
-#define SEQ_COL_MAXVALUE 5
-#define SEQ_COL_MINVALUE 6
-#define SEQ_COL_CACHE 7
-#define SEQ_COL_LOG 8
-#define SEQ_COL_CYCLE 9
-#define SEQ_COL_CALLED 10
+#define SEQ_COL_LASTVAL 1
+#define SEQ_COL_LOG 2
+#define SEQ_COL_CALLED 3
-#define SEQ_COL_FIRSTCOL SEQ_COL_NAME
+#define SEQ_COL_FIRSTCOL SEQ_COL_LASTVAL
#define SEQ_COL_LASTCOL SEQ_COL_CALLED
/* XLOG stuff */
@@ -77,6 +63,7 @@ extern Datum pg_sequence_last_value(PG_FUNCTION_ARGS);
extern ObjectAddress DefineSequence(ParseState *pstate, CreateSeqStmt *stmt);
extern ObjectAddress AlterSequence(ParseState *pstate, AlterSeqStmt *stmt);
+extern void DeleteSequenceTuple(Oid relid);
extern void ResetSequence(Oid seq_relid);
extern void ResetSequenceCaches(void);
diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h
index 39fe947d6e..4b7631e763 100644
--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -81,6 +81,7 @@ enum SysCacheIdentifier
REPLORIGIDENT,
REPLORIGNAME,
RULERELNAME,
+ SEQRELID,
STATRELATTINH,
TABLESPACEOID,
TRFOID,
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 5314b9c207..e9cfadbc0f 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1623,16 +1623,16 @@ UNION ALL
pg_sequences| SELECT n.nspname AS schemaname,
c.relname AS sequencename,
pg_get_userbyid(c.relowner) AS sequenceowner,
- p.start_value,
- p.minimum_value AS min_value,
- p.maximum_value AS max_value,
- p.increment AS increment_by,
- p.cycle_option AS cycle,
- p.cache_size,
+ s.seqstart AS start_value,
+ s.seqmin AS min_value,
+ s.seqmax AS max_value,
+ s.seqincrement AS increment_by,
+ s.seqcycle AS cycle,
+ s.seqcache AS cache_size,
pg_sequence_last_value((c.oid)::regclass) AS last_value
- FROM (pg_class c
- LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))),
- LATERAL pg_sequence_parameters(c.oid) p(start_value, minimum_value, maximum_value, increment, cycle_option, cache_size)
+ FROM ((pg_sequence s
+ JOIN pg_class c ON ((c.oid = s.seqrelid)))
+ LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace)))
WHERE ((NOT pg_is_other_temp_schema(n.oid)) AND (c.relkind = 'S'::"char"));
pg_settings| SELECT a.name,
a.setting,
diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out
index 8fa929a6aa..7ad68c745b 100644
--- a/src/test/regress/expected/sanity_check.out
+++ b/src/test/regress/expected/sanity_check.out
@@ -128,6 +128,7 @@ pg_range|t
pg_replication_origin|t
pg_rewrite|t
pg_seclabel|t
+pg_sequence|t
pg_shdepend|t
pg_shdescription|t
pg_shseclabel|t
diff --git a/src/test/regress/expected/sequence.out b/src/test/regress/expected/sequence.out
index c5413e09f3..a2bdd3002b 100644
--- a/src/test/regress/expected/sequence.out
+++ b/src/test/regress/expected/sequence.out
@@ -173,9 +173,9 @@ DROP SEQUENCE sequence_test;
CREATE SEQUENCE foo_seq;
ALTER TABLE foo_seq RENAME TO foo_seq_new;
SELECT * FROM foo_seq_new;
- sequence_name | last_value | start_value | increment_by | max_value | min_value | cache_value | log_cnt | is_cycled | is_called
----------------+------------+-------------+--------------+---------------------+-----------+-------------+---------+-----------+-----------
- foo_seq | 1 | 1 | 1 | 9223372036854775807 | 1 | 1 | 0 | f | f
+ last_value | log_cnt | is_called
+------------+---------+-----------
+ 1 | 0 | f
(1 row)
SELECT nextval('foo_seq_new');
@@ -191,9 +191,9 @@ SELECT nextval('foo_seq_new');
(1 row)
SELECT * FROM foo_seq_new;
- sequence_name | last_value | start_value | increment_by | max_value | min_value | cache_value | log_cnt | is_cycled | is_called
----------------+------------+-------------+--------------+---------------------+-----------+-------------+---------+-----------+-----------
- foo_seq | 2 | 1 | 1 | 9223372036854775807 | 1 | 1 | 31 | f | t
+ last_value | log_cnt | is_called
+------------+---------+-----------
+ 2 | 31 | t
(1 row)
DROP SEQUENCE foo_seq_new;
@@ -536,3 +536,24 @@ SELECT * FROM information_schema.sequences WHERE sequence_name IN
DROP USER regress_seq_user;
DROP SEQUENCE seq;
+-- cache tests
+CREATE SEQUENCE test_seq1 CACHE 10;
+SELECT nextval('test_seq1');
+ nextval
+---------
+ 1
+(1 row)
+
+SELECT nextval('test_seq1');
+ nextval
+---------
+ 2
+(1 row)
+
+SELECT nextval('test_seq1');
+ nextval
+---------
+ 3
+(1 row)
+
+DROP SEQUENCE test_seq1;
diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out
index 79ddbdee4f..8201d77a56 100644
--- a/src/test/regress/expected/updatable_views.out
+++ b/src/test/regress/expected/updatable_views.out
@@ -86,55 +86,48 @@ SELECT table_name, column_name, is_updatable
FROM information_schema.columns
WHERE table_name LIKE E'r_\\_view%'
ORDER BY table_name, ordinal_position;
- table_name | column_name | is_updatable
-------------+---------------+--------------
- ro_view1 | a | NO
- ro_view1 | b | NO
- ro_view10 | a | NO
- ro_view11 | a | NO
- ro_view11 | b | NO
- ro_view12 | a | NO
- ro_view13 | a | NO
- ro_view13 | b | NO
- ro_view17 | a | NO
- ro_view17 | b | NO
- ro_view18 | a | NO
- ro_view19 | sequence_name | NO
- ro_view19 | last_value | NO
- ro_view19 | start_value | NO
- ro_view19 | increment_by | NO
- ro_view19 | max_value | NO
- ro_view19 | min_value | NO
- ro_view19 | cache_value | NO
- ro_view19 | log_cnt | NO
- ro_view19 | is_cycled | NO
- ro_view19 | is_called | NO
- ro_view2 | a | NO
- ro_view2 | b | NO
- ro_view20 | a | NO
- ro_view20 | b | NO
- ro_view20 | g | NO
- ro_view3 | ?column? | NO
- ro_view4 | count | NO
- ro_view5 | a | NO
- ro_view5 | rank | NO
- ro_view6 | a | NO
- ro_view6 | b | NO
- ro_view7 | a | NO
- ro_view7 | b | NO
- ro_view8 | a | NO
- ro_view8 | b | NO
- ro_view9 | a | NO
- ro_view9 | b | NO
- rw_view14 | ctid | NO
- rw_view14 | a | YES
- rw_view14 | b | YES
- rw_view15 | a | YES
- rw_view15 | upper | NO
- rw_view16 | a | YES
- rw_view16 | b | YES
- rw_view16 | aa | YES
-(46 rows)
+ table_name | column_name | is_updatable
+------------+-------------+--------------
+ ro_view1 | a | NO
+ ro_view1 | b | NO
+ ro_view10 | a | NO
+ ro_view11 | a | NO
+ ro_view11 | b | NO
+ ro_view12 | a | NO
+ ro_view13 | a | NO
+ ro_view13 | b | NO
+ ro_view17 | a | NO
+ ro_view17 | b | NO
+ ro_view18 | a | NO
+ ro_view19 | last_value | NO
+ ro_view19 | log_cnt | NO
+ ro_view19 | is_called | NO
+ ro_view2 | a | NO
+ ro_view2 | b | NO
+ ro_view20 | a | NO
+ ro_view20 | b | NO
+ ro_view20 | g | NO
+ ro_view3 | ?column? | NO
+ ro_view4 | count | NO
+ ro_view5 | a | NO
+ ro_view5 | rank | NO
+ ro_view6 | a | NO
+ ro_view6 | b | NO
+ ro_view7 | a | NO
+ ro_view7 | b | NO
+ ro_view8 | a | NO
+ ro_view8 | b | NO
+ ro_view9 | a | NO
+ ro_view9 | b | NO
+ rw_view14 | ctid | NO
+ rw_view14 | a | YES
+ rw_view14 | b | YES
+ rw_view15 | a | YES
+ rw_view15 | upper | NO
+ rw_view16 | a | YES
+ rw_view16 | b | YES
+ rw_view16 | aa | YES
+(39 rows)
-- Read-only views
DELETE FROM ro_view1;
@@ -327,7 +320,7 @@ DELETE FROM ro_view18;
ERROR: cannot delete from view "ro_view18"
DETAIL: Views that do not select from a single table or view are not automatically updatable.
HINT: To enable deleting from the view, provide an INSTEAD OF DELETE trigger or an unconditional ON DELETE DO INSTEAD rule.
-UPDATE ro_view19 SET max_value=1000;
+UPDATE ro_view19 SET last_value=1000;
ERROR: cannot update view "ro_view19"
DETAIL: Views that do not select from a single table or view are not automatically updatable.
HINT: To enable updating the view, provide an INSTEAD OF UPDATE trigger or an unconditional ON UPDATE DO INSTEAD rule.
diff --git a/src/test/regress/sql/sequence.sql b/src/test/regress/sql/sequence.sql
index e2b6b63393..a79330e780 100644
--- a/src/test/regress/sql/sequence.sql
+++ b/src/test/regress/sql/sequence.sql
@@ -273,3 +273,11 @@ SELECT * FROM information_schema.sequences WHERE sequence_name IN
DROP USER regress_seq_user;
DROP SEQUENCE seq;
+
+-- cache tests
+CREATE SEQUENCE test_seq1 CACHE 10;
+SELECT nextval('test_seq1');
+SELECT nextval('test_seq1');
+SELECT nextval('test_seq1');
+
+DROP SEQUENCE test_seq1;
diff --git a/src/test/regress/sql/updatable_views.sql b/src/test/regress/sql/updatable_views.sql
index 03c3f9d35e..f008479541 100644
--- a/src/test/regress/sql/updatable_views.sql
+++ b/src/test/regress/sql/updatable_views.sql
@@ -95,7 +95,7 @@ DELETE FROM rw_view16 WHERE a=-3; -- should be OK
-- Read-only views
INSERT INTO ro_view17 VALUES (3, 'ROW 3');
DELETE FROM ro_view18;
-UPDATE ro_view19 SET max_value=1000;
+UPDATE ro_view19 SET last_value=1000;
UPDATE ro_view20 SET b=upper(b);
DROP TABLE base_tbl CASCADE;
--
cgit v1.2.3
From f3b421da5f4addc95812b9db05a24972b8fd9739 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Wed, 21 Dec 2016 12:00:00 -0500
Subject: Reorder pg_sequence columns to avoid alignment issue
On AIX, doubles are aligned at 4 bytes, but int64 is aligned at 8 bytes.
Our code assumes that doubles have alignment that can also be applied to
int64, but that fails in this case. One effect is that
heap_form_tuple() writes tuples in a different layout than
Form_pg_sequence expects.
Rather than rewrite the whole alignment code, work around the issue by
reordering the columns in pg_sequence so that the first int64 column
naturally comes out at an 8-byte boundary.
---
doc/src/sgml/catalogs.sgml | 14 +++++++-------
src/backend/commands/sequence.c | 4 ++--
src/include/catalog/catversion.h | 2 +-
src/include/catalog/pg_sequence.h | 14 +++++++-------
4 files changed, 17 insertions(+), 17 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 7a7bbde390..493050618d 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -5627,6 +5627,13 @@
The OID of the pg_class> entry for this sequence
+
+ seqcycle
+ bool
+
+ Whether the sequence cycles
+
+
seqstartint8
@@ -5661,13 +5668,6 @@
Cache size of the sequence
-
-
- seqcycle
- bool
-
- Whether the sequence cycles
-
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
index cdd32bc17e..668d82771a 100644
--- a/src/backend/commands/sequence.c
+++ b/src/backend/commands/sequence.c
@@ -227,12 +227,12 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq)
memset(pgs_nulls, 0, sizeof(pgs_nulls));
pgs_values[Anum_pg_sequence_seqrelid - 1] = ObjectIdGetDatum(seqoid);
+ pgs_values[Anum_pg_sequence_seqcycle - 1] = BoolGetDatum(seqform.seqcycle);
pgs_values[Anum_pg_sequence_seqstart - 1] = Int64GetDatumFast(seqform.seqstart);
pgs_values[Anum_pg_sequence_seqincrement - 1] = Int64GetDatumFast(seqform.seqincrement);
pgs_values[Anum_pg_sequence_seqmax - 1] = Int64GetDatumFast(seqform.seqmax);
pgs_values[Anum_pg_sequence_seqmin - 1] = Int64GetDatumFast(seqform.seqmin);
pgs_values[Anum_pg_sequence_seqcache - 1] = Int64GetDatumFast(seqform.seqcache);
- pgs_values[Anum_pg_sequence_seqcycle - 1] = BoolGetDatum(seqform.seqcycle);
tuple = heap_form_tuple(tupDesc, pgs_values, pgs_nulls);
simple_heap_insert(rel, tuple);
@@ -622,11 +622,11 @@ nextval_internal(Oid relid)
if (!HeapTupleIsValid(pgstuple))
elog(ERROR, "cache lookup failed for sequence %u", relid);
pgsform = (Form_pg_sequence) GETSTRUCT(pgstuple);
+ cycle = pgsform->seqcycle;
incby = pgsform->seqincrement;
maxv = pgsform->seqmax;
minv = pgsform->seqmin;
cache = pgsform->seqcache;
- cycle = pgsform->seqcycle;
ReleaseSysCache(pgstuple);
/* lock page' buffer and read tuple */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 5779f0d617..7d15189ead 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201612201
+#define CATALOG_VERSION_NO 201612202
#endif
diff --git a/src/include/catalog/pg_sequence.h b/src/include/catalog/pg_sequence.h
index 3bcda6bef1..350b286e45 100644
--- a/src/include/catalog/pg_sequence.h
+++ b/src/include/catalog/pg_sequence.h
@@ -8,23 +8,23 @@
CATALOG(pg_sequence,2224) BKI_WITHOUT_OIDS
{
Oid seqrelid;
+ bool seqcycle;
int64 seqstart;
int64 seqincrement;
int64 seqmax;
int64 seqmin;
int64 seqcache;
- bool seqcycle;
} FormData_pg_sequence;
typedef FormData_pg_sequence *Form_pg_sequence;
#define Natts_pg_sequence 7
#define Anum_pg_sequence_seqrelid 1
-#define Anum_pg_sequence_seqstart 2
-#define Anum_pg_sequence_seqincrement 3
-#define Anum_pg_sequence_seqmax 4
-#define Anum_pg_sequence_seqmin 5
-#define Anum_pg_sequence_seqcache 6
-#define Anum_pg_sequence_seqcycle 7
+#define Anum_pg_sequence_seqcycle 2
+#define Anum_pg_sequence_seqstart 3
+#define Anum_pg_sequence_seqincrement 4
+#define Anum_pg_sequence_seqmax 5
+#define Anum_pg_sequence_seqmin 6
+#define Anum_pg_sequence_seqcache 7
#endif /* PG_SEQUENCE_H */
--
cgit v1.2.3
From 2d1018ca56f5ddaf0bfb5b4d0133283f3e823301 Mon Sep 17 00:00:00 2001
From: Stephen Frost
Date: Wed, 21 Dec 2016 15:03:32 -0500
Subject: Improve ALTER TABLE documentation
The ALTER TABLE documentation wasn't terribly clear when it came to
which commands could be combined together and what it meant when they
were.
In particular, SET TABLESPACE *can* be combined with other commands,
when it's operating against a single table, but not when multiple tables
are being moved with ALL IN TABLESPACE. Further, the actions are
applied together but not really in 'parallel', at least today.
Pointed out by: Amit Langote
Improved wording from Tom.
Back-patch to 9.4, where the ALL IN TABLESPACE option was added.
Discussion: https://www.postgresql.org/message-id/14c535b4-13ef-0590-1b98-76af355a0763%40lab.ntt.co.jp
---
doc/src/sgml/ref/alter_table.sgml | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml
index 8ea6624147..44f1ec644f 100644
--- a/doc/src/sgml/ref/alter_table.sgml
+++ b/doc/src/sgml/ref/alter_table.sgml
@@ -767,11 +767,11 @@ ALTER TABLE [ IF EXISTS ] name
- All the actions except RENAME,
- SET TABLESPACE, SET SCHEMA,
+ All the forms of ALTER TABLE that act on a single table, except
+ RENAME, SET SCHEMA,
ATTACH PARTITION, and
DETACH PARTITION can be combined into
- a list of multiple alterations to apply in parallel. For example, it
+ a list of multiple alterations to be applied together. For example, it
is possible to add several columns and/or alter the type of several
columns in a single command. This is particularly useful with large
tables, since only one pass over the table need be made.
--
cgit v1.2.3
From 89fcea1ace40bc025beea2758a80bcd56a319a6f Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Wed, 21 Dec 2016 15:18:25 -0500
Subject: Fix strange behavior (and possible crashes) in full text phrase
search.
In an attempt to simplify the tsquery matching engine, the original
phrase search patch invented rewrite rules that would rearrange a
tsquery so that no AND/OR/NOT operator appeared below a PHRASE operator.
But this approach had numerous problems. The rearrangement step was
missed by ts_rewrite (and perhaps other places), allowing tsqueries
to be created that would cause Assert failures or perhaps crashes at
execution, as reported by Andreas Seltenreich. The rewrite rules
effectively defined semantics for operators underneath PHRASE that were
buggy, or at least unintuitive. And because rewriting was done in
tsqueryin() rather than at execution, the rearrangement was user-visible,
which is not very desirable --- for example, it might cause unexpected
matches or failures to match in ts_rewrite.
As a somewhat independent problem, the behavior of nested PHRASE operators
was only sane for left-deep trees; queries like "x <-> (y <-> z)" did not
behave intuitively at all.
To fix, get rid of the rewrite logic altogether, and instead teach the
tsquery execution engine to manage AND/OR/NOT below a PHRASE operator
by explicitly computing the match location(s) and match widths for these
operators.
This requires introducing some additional fields into the publicly visible
ExecPhraseData struct; but since there's no way for third-party code to
pass such a struct to TS_phrase_execute, it shouldn't create an ABI problem
as long as we don't move the offsets of the existing fields.
Another related problem was that index searches supposed that "!x <-> y"
could be lossily approximated as "!x & y", which isn't correct because
the latter will reject, say, "x q y" which the query itself accepts.
This required some tweaking in TS_execute_ternary along with the main
tsquery engine.
Back-patch to 9.6 where phrase operators were introduced. While this
could be argued to change behavior more than we'd like in a stable branch,
we have to do something about the crash hazards and index-vs-seqscan
inconsistency, and it doesn't seem desirable to let the unintuitive
behaviors induced by the rewriting implementation stand as precedent.
Discussion: https://postgr.es/m/28215.1481999808@sss.pgh.pa.us
Discussion: https://postgr.es/m/26706.1482087250@sss.pgh.pa.us
---
doc/src/sgml/datatype.sgml | 8 -
doc/src/sgml/textsearch.sgml | 26 +-
src/backend/utils/adt/tsginidx.c | 28 +-
src/backend/utils/adt/tsgistidx.c | 17 +-
src/backend/utils/adt/tsquery.c | 25 +-
src/backend/utils/adt/tsquery_cleanup.c | 242 +---------------
src/backend/utils/adt/tsquery_op.c | 4 +-
src/backend/utils/adt/tsvector_op.c | 477 ++++++++++++++++++++++++--------
src/include/tsearch/ts_utils.h | 31 ++-
src/test/regress/expected/tsdicts.out | 12 +-
src/test/regress/expected/tsearch.out | 40 ++-
src/test/regress/expected/tstypes.out | 298 ++++++++++----------
src/test/regress/sql/tsearch.sql | 6 +
src/test/regress/sql/tstypes.sql | 56 ++--
14 files changed, 671 insertions(+), 599 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index 67d0c349e0..464ce83d30 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -3959,15 +3959,7 @@ SELECT 'fat & rat & ! cat'::tsquery;
tsquery
------------------------
'fat' & 'rat' & !'cat'
-
-SELECT '(fat | rat) <-> cat'::tsquery;
- tsquery
------------------------------------
- 'fat' <-> 'cat' | 'rat' <-> 'cat'
-
- The last example demonstrates that tsquery sometimes
- rearranges nested operators into a logically equivalent formulation.
diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml
index 2da75955d0..67e4901c92 100644
--- a/doc/src/sgml/textsearch.sgml
+++ b/doc/src/sgml/textsearch.sgml
@@ -264,7 +264,7 @@ SELECT 'fat & cow'::tsquery @@ 'a fat cat sat on a mat and ate a fat rat'::t
text, any more than a tsvector is. A tsquery
contains search terms, which must be already-normalized lexemes, and
may combine multiple terms using AND, OR, NOT, and FOLLOWED BY operators.
- (For details see .) There are
+ (For syntax details see .) There are
functions to_tsquery>, plainto_tsquery>,
and phraseto_tsquery>
that are helpful in converting user-written text into a proper
@@ -323,6 +323,8 @@ text @@ text
at least one of its arguments must appear, while the !> (NOT)
operator specifies that its argument must not> appear in
order to have a match.
+ For example, the query fat & ! rat> matches documents that
+ contain fat> but not rat>.
@@ -377,6 +379,28 @@ SELECT phraseto_tsquery('the cats ate the rats');
then &, then <->,
and ! most tightly.
+
+
+ It's worth noticing that the AND/OR/NOT operators mean something subtly
+ different when they are within the arguments of a FOLLOWED BY operator
+ than when they are not, because within FOLLOWED BY the exact position of
+ the match is significant. For example, normally !x> matches
+ only documents that do not contain x> anywhere.
+ But !x <-> y> matches y> if it is not
+ immediately after an x>; an occurrence of x>
+ elsewhere in the document does not prevent a match. Another example is
+ that x & y> normally only requires that x>
+ and y> both appear somewhere in the document, but
+ (x & y) <-> z> requires x>
+ and y> to match at the same place, immediately before
+ a z>. Thus this query behaves differently from
+ x <-> z & y <-> z>, which will match a
+ document containing two separate sequences x z> and
+ y z>. (This specific query is useless as written,
+ since x> and y> could not match at the same place;
+ but with more complex situations such as prefix-match patterns, a query
+ of this form could be useful.)
+
diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c
index efc111e379..3e0a44459a 100644
--- a/src/backend/utils/adt/tsginidx.c
+++ b/src/backend/utils/adt/tsginidx.c
@@ -212,7 +212,7 @@ checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
* Evaluate tsquery boolean expression using ternary logic.
*/
static GinTernaryValue
-TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem)
+TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem, bool in_phrase)
{
GinTernaryValue val1,
val2,
@@ -230,7 +230,10 @@ TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem)
switch (curitem->qoperator.oper)
{
case OP_NOT:
- result = TS_execute_ternary(gcv, curitem + 1);
+ /* In phrase search, always return MAYBE since we lack positions */
+ if (in_phrase)
+ return GIN_MAYBE;
+ result = TS_execute_ternary(gcv, curitem + 1, in_phrase);
if (result == GIN_MAYBE)
return result;
return !result;
@@ -238,17 +241,21 @@ TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem)
case OP_PHRASE:
/*
- * GIN doesn't contain any information about positions, treat
+ * GIN doesn't contain any information about positions, so treat
* OP_PHRASE as OP_AND with recheck requirement
*/
- *gcv->need_recheck = true;
+ *(gcv->need_recheck) = true;
+ /* Pass down in_phrase == true in case there's a NOT below */
+ in_phrase = true;
+
/* FALL THRU */
case OP_AND:
- val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left);
+ val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left,
+ in_phrase);
if (val1 == GIN_FALSE)
return GIN_FALSE;
- val2 = TS_execute_ternary(gcv, curitem + 1);
+ val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase);
if (val2 == GIN_FALSE)
return GIN_FALSE;
if (val1 == GIN_TRUE && val2 == GIN_TRUE)
@@ -257,10 +264,11 @@ TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem)
return GIN_MAYBE;
case OP_OR:
- val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left);
+ val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left,
+ in_phrase);
if (val1 == GIN_TRUE)
return GIN_TRUE;
- val2 = TS_execute_ternary(gcv, curitem + 1);
+ val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase);
if (val2 == GIN_TRUE)
return GIN_TRUE;
if (val1 == GIN_FALSE && val2 == GIN_FALSE)
@@ -307,7 +315,7 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS)
res = TS_execute(GETQUERY(query),
&gcv,
- TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_AS_AND,
+ TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_NO_POS,
checkcondition_gin);
}
@@ -343,7 +351,7 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
gcv.map_item_operand = (int *) (extra_data[0]);
gcv.need_recheck = &recheck;
- res = TS_execute_ternary(&gcv, GETQUERY(query));
+ res = TS_execute_ternary(&gcv, GETQUERY(query), false);
if (res == GIN_TRUE && recheck)
res = GIN_MAYBE;
diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c
index 6cdfb13f6d..a4c2bb9cec 100644
--- a/src/backend/utils/adt/tsgistidx.c
+++ b/src/backend/utils/adt/tsgistidx.c
@@ -359,12 +359,11 @@ gtsvector_consistent(PG_FUNCTION_ARGS)
if (ISALLTRUE(key))
PG_RETURN_BOOL(true);
- PG_RETURN_BOOL(TS_execute(
- GETQUERY(query),
+ /* since signature is lossy, cannot specify CALC_NOT here */
+ PG_RETURN_BOOL(TS_execute(GETQUERY(query),
(void *) GETSIGN(key),
- TS_EXEC_PHRASE_AS_AND,
- checkcondition_bit
- ));
+ TS_EXEC_PHRASE_NO_POS,
+ checkcondition_bit));
}
else
{ /* only leaf pages */
@@ -372,12 +371,10 @@ gtsvector_consistent(PG_FUNCTION_ARGS)
chkval.arrb = GETARR(key);
chkval.arre = chkval.arrb + ARRNELEM(key);
- PG_RETURN_BOOL(TS_execute(
- GETQUERY(query),
+ PG_RETURN_BOOL(TS_execute(GETQUERY(query),
(void *) &chkval,
- TS_EXEC_PHRASE_AS_AND | TS_EXEC_CALC_NOT,
- checkcondition_arr
- ));
+ TS_EXEC_PHRASE_NO_POS | TS_EXEC_CALC_NOT,
+ checkcondition_arr));
}
}
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c
index 3d11a1c208..f0bd52877f 100644
--- a/src/backend/utils/adt/tsquery.c
+++ b/src/backend/utils/adt/tsquery.c
@@ -557,13 +557,11 @@ findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup)
curitem->oper == OP_OR ||
curitem->oper == OP_PHRASE);
- if (curitem->oper == OP_PHRASE)
- *needcleanup = true; /* push OP_PHRASE down later */
-
(*pos)++;
/* process RIGHT argument */
findoprnd_recurse(ptr, pos, nnodes, needcleanup);
+
curitem->left = *pos - tmp; /* set LEFT arg's offset */
/* process LEFT argument */
@@ -574,8 +572,9 @@ findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup)
/*
- * Fills in the left-fields previously left unfilled. The input
- * QueryItems must be in polish (prefix) notation.
+ * Fill in the left-fields previously left unfilled.
+ * The input QueryItems must be in polish (prefix) notation.
+ * Also, set *needcleanup to true if there are any QI_VALSTOP nodes.
*/
static void
findoprnd(QueryItem *ptr, int size, bool *needcleanup)
@@ -687,15 +686,17 @@ parse_tsquery(char *buf,
memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
pfree(state.op);
- /* Set left operand pointers for every operator. */
+ /*
+ * Set left operand pointers for every operator. While we're at it,
+ * detect whether there are any QI_VALSTOP nodes.
+ */
findoprnd(ptr, query->size, &needcleanup);
/*
- * QI_VALSTOP nodes should be cleaned and OP_PHRASE should be pushed
- * down
+ * If there are QI_VALSTOP nodes, delete them and simplify the tree.
*/
if (needcleanup)
- return cleanup_fakeval_and_phrase(query);
+ query = cleanup_tsquery_stopwords(query);
return query;
}
@@ -1088,6 +1089,9 @@ tsqueryrecv(PG_FUNCTION_ARGS)
*/
findoprnd(item, size, &needcleanup);
+ /* Can't have found any QI_VALSTOP nodes */
+ Assert(!needcleanup);
+
/* Copy operands to output struct */
for (i = 0; i < size; i++)
{
@@ -1105,9 +1109,6 @@ tsqueryrecv(PG_FUNCTION_ARGS)
SET_VARSIZE(query, len + datalen);
- if (needcleanup)
- PG_RETURN_TSQUERY(cleanup_fakeval_and_phrase(query));
-
PG_RETURN_TSQUERY(query);
}
diff --git a/src/backend/utils/adt/tsquery_cleanup.c b/src/backend/utils/adt/tsquery_cleanup.c
index 330664da63..c10c7ef0aa 100644
--- a/src/backend/utils/adt/tsquery_cleanup.c
+++ b/src/backend/utils/adt/tsquery_cleanup.c
@@ -25,19 +25,6 @@ typedef struct NODE
QueryItem *valnode;
} NODE;
-/*
- * To simplify walking on query tree and pushing down of phrase operator
- * we define some fake priority here: phrase operator has highest priority
- * of any other operators (and we believe here that OP_PHRASE is a highest
- * code of operations) and value node has ever highest priority.
- * Priority values of other operations don't matter until they are less than
- * phrase operator and value node.
- */
-#define VALUE_PRIORITY (OP_COUNT + 1)
-#define NODE_PRIORITY(x) \
- ( ((x)->valnode->qoperator.type == QI_OPR) ? \
- (x)->valnode->qoperator.oper : VALUE_PRIORITY )
-
/*
* make query tree from plain view of query
*/
@@ -368,227 +355,6 @@ clean_stopword_intree(NODE *node, int *ladd, int *radd)
return node;
}
-static NODE *
-copyNODE(NODE *node)
-{
- NODE *cnode = palloc(sizeof(NODE));
-
- /* since this function recurses, it could be driven to stack overflow. */
- check_stack_depth();
-
- cnode->valnode = palloc(sizeof(QueryItem));
- *(cnode->valnode) = *(node->valnode);
-
- if (node->valnode->type == QI_OPR)
- {
- cnode->right = copyNODE(node->right);
- if (node->valnode->qoperator.oper != OP_NOT)
- cnode->left = copyNODE(node->left);
- }
-
- return cnode;
-}
-
-static NODE *
-makeNODE(int8 op, NODE *left, NODE *right)
-{
- NODE *node = palloc(sizeof(NODE));
-
- /* zeroing allocation to prevent difference in unused bytes */
- node->valnode = palloc0(sizeof(QueryItem));
-
- node->valnode->qoperator.type = QI_OPR;
- node->valnode->qoperator.oper = op;
-
- node->left = left;
- node->right = right;
-
- return node;
-}
-
-/*
- * Move operation with high priority to the leaves. This guarantees
- * that the phrase operator will be near the bottom of the tree.
- * An idea behind is do not store position of lexemes during execution
- * of ordinary operations (AND, OR, NOT) because it could be expensive.
- * Actual transformation will be performed only on subtrees under the
- * <-> () operation since it's needed solely for the phrase operator.
- *
- * Rules:
- * a <-> (b | c) => (a <-> b) | (a <-> c)
- * (a | b) <-> c => (a <-> c) | (b <-> c)
- * a <-> !b => a & !(a <-> b)
- * !a <-> b => b & !(a <-> b)
- *
- * Warnings for readers:
- * a <-> b != b <-> a
- *
- * a (b c) != (a b) c since the phrase lengths are:
- * n 2n-1
- */
-static NODE *
-normalize_phrase_tree(NODE *node)
-{
- /* there should be no stop words at this point */
- Assert(node->valnode->type != QI_VALSTOP);
-
- if (node->valnode->type == QI_VAL)
- return node;
-
- /* since this function recurses, it could be driven to stack overflow. */
- check_stack_depth();
-
- Assert(node->valnode->type == QI_OPR);
-
- if (node->valnode->qoperator.oper == OP_NOT)
- {
- NODE *orignode = node;
-
- /* eliminate NOT sequence */
- while (node->valnode->type == QI_OPR &&
- node->valnode->qoperator.oper == node->right->valnode->qoperator.oper)
- {
- node = node->right->right;
- }
-
- if (orignode != node)
- /* current node isn't checked yet */
- node = normalize_phrase_tree(node);
- else
- node->right = normalize_phrase_tree(node->right);
- }
- else if (node->valnode->qoperator.oper == OP_PHRASE)
- {
- int16 distance;
- NODE *X;
-
- node->left = normalize_phrase_tree(node->left);
- node->right = normalize_phrase_tree(node->right);
-
- /*
- * if subtree contains only nodes with higher "priority" then we are
- * done. See comment near NODE_PRIORITY()
- */
- if (NODE_PRIORITY(node) <= NODE_PRIORITY(node->right) &&
- NODE_PRIORITY(node) <= NODE_PRIORITY(node->left))
- return node;
-
- /*
- * We can't swap left-right and works only with left child because of
- * a <-> b != b <-> a
- */
-
- distance = node->valnode->qoperator.distance;
-
- if (node->right->valnode->type == QI_OPR)
- {
- switch (node->right->valnode->qoperator.oper)
- {
- case OP_AND:
- /* a <-> (b & c) => (a <-> b) & (a <-> c) */
- node = makeNODE(OP_AND,
- makeNODE(OP_PHRASE,
- node->left,
- node->right->left),
- makeNODE(OP_PHRASE,
- copyNODE(node->left),
- node->right->right));
- node->left->valnode->qoperator.distance =
- node->right->valnode->qoperator.distance = distance;
- break;
- case OP_OR:
- /* a <-> (b | c) => (a <-> b) | (a <-> c) */
- node = makeNODE(OP_OR,
- makeNODE(OP_PHRASE,
- node->left,
- node->right->left),
- makeNODE(OP_PHRASE,
- copyNODE(node->left),
- node->right->right));
- node->left->valnode->qoperator.distance =
- node->right->valnode->qoperator.distance = distance;
- break;
- case OP_NOT:
- /* a <-> !b => a & !(a <-> b) */
- X = node->right;
- node->right = node->right->right;
- X->right = node;
- node = makeNODE(OP_AND,
- copyNODE(node->left),
- X);
- break;
- case OP_PHRASE:
- /* no-op */
- break;
- default:
- elog(ERROR, "Wrong type of tsquery node: %d",
- node->right->valnode->qoperator.oper);
- }
- }
-
- if (node->left->valnode->type == QI_OPR &&
- node->valnode->qoperator.oper == OP_PHRASE)
- {
- /*
- * if the node is still OP_PHRASE, check the left subtree,
- * otherwise the whole node will be transformed later.
- */
- switch (node->left->valnode->qoperator.oper)
- {
- case OP_AND:
- /* (a & b) <-> c => (a <-> c) & (b <-> c) */
- node = makeNODE(OP_AND,
- makeNODE(OP_PHRASE,
- node->left->left,
- node->right),
- makeNODE(OP_PHRASE,
- node->left->right,
- copyNODE(node->right)));
- node->left->valnode->qoperator.distance =
- node->right->valnode->qoperator.distance = distance;
- break;
- case OP_OR:
- /* (a | b) <-> c => (a <-> c) | (b <-> c) */
- node = makeNODE(OP_OR,
- makeNODE(OP_PHRASE,
- node->left->left,
- node->right),
- makeNODE(OP_PHRASE,
- node->left->right,
- copyNODE(node->right)));
- node->left->valnode->qoperator.distance =
- node->right->valnode->qoperator.distance = distance;
- break;
- case OP_NOT:
- /* !a <-> b => b & !(a <-> b) */
- X = node->left;
- node->left = node->left->right;
- X->right = node;
- node = makeNODE(OP_AND,
- X,
- copyNODE(node->right));
- break;
- case OP_PHRASE:
- /* no-op */
- break;
- default:
- elog(ERROR, "Wrong type of tsquery node: %d",
- node->left->valnode->qoperator.oper);
- }
- }
-
- /* continue transformation */
- node = normalize_phrase_tree(node);
- }
- else /* AND or OR */
- {
- node->left = normalize_phrase_tree(node->left);
- node->right = normalize_phrase_tree(node->right);
- }
-
- return node;
-}
-
/*
* Number of elements in query tree
*/
@@ -613,8 +379,11 @@ calcstrlen(NODE *node)
return size;
}
+/*
+ * Remove QI_VALSTOP (stopword) nodes from TSQuery.
+ */
TSQuery
-cleanup_fakeval_and_phrase(TSQuery in)
+cleanup_tsquery_stopwords(TSQuery in)
{
int32 len,
lenstr,
@@ -642,9 +411,6 @@ cleanup_fakeval_and_phrase(TSQuery in)
return out;
}
- /* push OP_PHRASE nodes down */
- root = normalize_phrase_tree(root);
-
/*
* Build TSQuery from plain view
*/
diff --git a/src/backend/utils/adt/tsquery_op.c b/src/backend/utils/adt/tsquery_op.c
index a574b4b257..8f90ce99e0 100644
--- a/src/backend/utils/adt/tsquery_op.c
+++ b/src/backend/utils/adt/tsquery_op.c
@@ -104,7 +104,7 @@ tsquery_or(PG_FUNCTION_ARGS)
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
- PG_RETURN_POINTER(query);
+ PG_RETURN_TSQUERY(query);
}
Datum
@@ -140,7 +140,7 @@ tsquery_phrase_distance(PG_FUNCTION_ARGS)
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
- PG_RETURN_POINTER(cleanup_fakeval_and_phrase(query));
+ PG_RETURN_TSQUERY(query);
}
Datum
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index 36cc10c901..01c721f835 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -11,9 +11,10 @@
*
*-------------------------------------------------------------------------
*/
-
#include "postgres.h"
+#include
+
#include "access/htup_details.h"
#include "catalog/namespace.h"
#include "catalog/pg_type.h"
@@ -1404,148 +1405,395 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
return res;
}
+/*
+ * Compute output position list for a tsquery operator in phrase mode.
+ *
+ * Merge the position lists in Ldata and Rdata as specified by "emit",
+ * returning the result list into *data. The input position lists must be
+ * sorted and unique, and the output will be as well.
+ *
+ * data: pointer to initially-all-zeroes output struct, or NULL
+ * Ldata, Rdata: input position lists
+ * emit: bitmask of TSPO_XXX flags
+ * Loffset: offset to be added to Ldata positions before comparing/outputting
+ * Roffset: offset to be added to Rdata positions before comparing/outputting
+ * max_npos: maximum possible required size of output position array
+ *
+ * Loffset and Roffset should not be negative, else we risk trying to output
+ * negative positions, which won't fit into WordEntryPos.
+ *
+ * Returns true if any positions were emitted to *data; or if data is NULL,
+ * returns true if any positions would have been emitted.
+ */
+#define TSPO_L_ONLY 0x01 /* emit positions appearing only in L */
+#define TSPO_R_ONLY 0x02 /* emit positions appearing only in R */
+#define TSPO_BOTH 0x04 /* emit positions appearing in both L&R */
+
+static bool
+TS_phrase_output(ExecPhraseData *data,
+ ExecPhraseData *Ldata,
+ ExecPhraseData *Rdata,
+ int emit,
+ int Loffset,
+ int Roffset,
+ int max_npos)
+{
+ int Lindex,
+ Rindex;
+
+ /* Loop until both inputs are exhausted */
+ Lindex = Rindex = 0;
+ while (Lindex < Ldata->npos || Rindex < Rdata->npos)
+ {
+ int Lpos,
+ Rpos;
+ int output_pos = 0;
+
+ /*
+ * Fetch current values to compare. WEP_GETPOS() is needed because
+ * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
+ */
+ if (Lindex < Ldata->npos)
+ Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
+ else
+ {
+ /* L array exhausted, so we're done if R_ONLY isn't set */
+ if (!(emit & TSPO_R_ONLY))
+ break;
+ Lpos = INT_MAX;
+ }
+ if (Rindex < Rdata->npos)
+ Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
+ else
+ {
+ /* R array exhausted, so we're done if L_ONLY isn't set */
+ if (!(emit & TSPO_L_ONLY))
+ break;
+ Rpos = INT_MAX;
+ }
+
+ /* Merge-join the two input lists */
+ if (Lpos < Rpos)
+ {
+ /* Lpos is not matched in Rdata, should we output it? */
+ if (emit & TSPO_L_ONLY)
+ output_pos = Lpos;
+ Lindex++;
+ }
+ else if (Lpos == Rpos)
+ {
+ /* Lpos and Rpos match ... should we output it? */
+ if (emit & TSPO_BOTH)
+ output_pos = Rpos;
+ Lindex++;
+ Rindex++;
+ }
+ else /* Lpos > Rpos */
+ {
+ /* Rpos is not matched in Ldata, should we output it? */
+ if (emit & TSPO_R_ONLY)
+ output_pos = Rpos;
+ Rindex++;
+ }
+
+ if (output_pos > 0)
+ {
+ if (data)
+ {
+ /* Store position, first allocating output array if needed */
+ if (data->pos == NULL)
+ {
+ data->pos = (WordEntryPos *)
+ palloc(max_npos * sizeof(WordEntryPos));
+ data->allocated = true;
+ }
+ data->pos[data->npos++] = output_pos;
+ }
+ else
+ {
+ /*
+ * Exact positions not needed, so return true as soon as we
+ * know there is at least one.
+ */
+ return true;
+ }
+ }
+ }
+
+ if (data && data->npos > 0)
+ {
+ /* Let's assert we didn't overrun the array */
+ Assert(data->npos <= max_npos);
+ return true;
+ }
+ return false;
+}
+
/*
* Execute tsquery at or below an OP_PHRASE operator.
*
- * This handles the recursion at levels where we need to care about
- * match locations. In addition to the same arguments used for TS_execute,
- * the caller may pass a preinitialized-to-zeroes ExecPhraseData struct to
- * be filled with lexeme match positions on success. data == NULL if no
- * match data need be returned. (In practice, outside callers pass NULL,
- * and only the internal recursion cases pass a data pointer.)
+ * This handles tsquery execution at recursion levels where we need to care
+ * about match locations.
+ *
+ * In addition to the same arguments used for TS_execute, the caller may pass
+ * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
+ * match position info on success. data == NULL if no position data need be
+ * returned. (In practice, outside callers pass NULL, and only the internal
+ * recursion cases pass a data pointer.)
+ * Note: the function assumes data != NULL for operators other than OP_PHRASE.
+ * This is OK because an outside call always starts from an OP_PHRASE node.
+ *
+ * The detailed semantics of the match data, given that the function returned
+ * "true" (successful match, or possible match), are:
+ *
+ * npos > 0, negate = false:
+ * query is matched at specified position(s) (and only those positions)
+ * npos > 0, negate = true:
+ * query is matched at all positions *except* specified position(s)
+ * npos = 0, negate = false:
+ * query is possibly matched, matching position(s) are unknown
+ * (this should only be returned when TS_EXEC_PHRASE_NO_POS flag is set)
+ * npos = 0, negate = true:
+ * query is matched at all positions
+ *
+ * Successful matches also return a "width" value which is the match width in
+ * lexemes, less one. Hence, "width" is zero for simple one-lexeme matches,
+ * and is the sum of the phrase operator distances for phrase matches. Note
+ * that when width > 0, the listed positions represent the ends of matches not
+ * the starts. (This unintuitive rule is needed to avoid possibly generating
+ * negative positions, which wouldn't fit into the WordEntryPos arrays.)
+ *
+ * When the function returns "false" (no match), it must return npos = 0,
+ * negate = false (which is the state initialized by the caller); but the
+ * "width" output in such cases is undefined.
*/
static bool
TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
- ExecPhraseData *data,
- TSExecuteCallback chkcond)
+ TSExecuteCallback chkcond,
+ ExecPhraseData *data)
{
+ ExecPhraseData Ldata,
+ Rdata;
+ bool lmatch,
+ rmatch;
+ int Loffset,
+ Roffset,
+ maxwidth;
+
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
if (curitem->type == QI_VAL)
- {
return chkcond(arg, (QueryOperand *) curitem, data);
- }
- else
+
+ switch (curitem->qoperator.oper)
{
- ExecPhraseData Ldata = {0, false, NULL},
- Rdata = {0, false, NULL};
- WordEntryPos *Lpos,
- *LposStart,
- *Rpos,
- *pos_iter = NULL;
+ case OP_NOT:
- Assert(curitem->qoperator.oper == OP_PHRASE);
+ /*
+ * Because a "true" result with no specific positions is taken as
+ * uncertain, we need no special care here for !TS_EXEC_CALC_NOT.
+ * If it's a false positive, the right things happen anyway.
+ *
+ * Also, we need not touch data->width, since a NOT operation does
+ * not change the match width.
+ */
+ if (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
+ {
+ if (data->npos > 0)
+ {
+ /* we have some positions, invert negate flag */
+ data->negate = !data->negate;
+ return true;
+ }
+ else if (data->negate)
+ {
+ /* change "match everywhere" to "match nowhere" */
+ data->negate = false;
+ return false;
+ }
+ /* match positions are, and remain, uncertain */
+ return true;
+ }
+ else
+ {
+ /* change "match nowhere" to "match everywhere" */
+ Assert(data->npos == 0 && !data->negate);
+ data->negate = true;
+ return true;
+ }
- if (!TS_phrase_execute(curitem + curitem->qoperator.left,
- arg, flags, &Ldata, chkcond))
- return false;
+ case OP_PHRASE:
+ case OP_AND:
+ memset(&Ldata, 0, sizeof(Ldata));
+ memset(&Rdata, 0, sizeof(Rdata));
- if (!TS_phrase_execute(curitem + 1, arg, flags, &Rdata, chkcond))
- return false;
+ if (!TS_phrase_execute(curitem + curitem->qoperator.left,
+ arg, flags, chkcond, &Ldata))
+ return false;
- /*
- * If either operand has no position information, then we normally
- * return false. But if TS_EXEC_PHRASE_AS_AND flag is set then we
- * return true, treating OP_PHRASE as if it were OP_AND.
- */
- if (Ldata.npos == 0 || Rdata.npos == 0)
- return (flags & TS_EXEC_PHRASE_AS_AND) ? true : false;
+ if (!TS_phrase_execute(curitem + 1,
+ arg, flags, chkcond, &Rdata))
+ return false;
- /*
- * Prepare output position array if needed.
- */
- if (data)
- {
/*
- * We can recycle the righthand operand's result array if it was
- * palloc'd, else must allocate our own. The number of matches
- * couldn't be more than the smaller of the two operands' matches.
+ * If either operand has no position information, then we can't
+ * return position data, only a "possible match" result. "Possible
+ * match" answers are only wanted when TS_EXEC_PHRASE_NO_POS flag
+ * is set, otherwise return false.
*/
- if (!Rdata.allocated)
- data->pos = palloc(sizeof(WordEntryPos) * Min(Ldata.npos, Rdata.npos));
- else
- data->pos = Rdata.pos;
+ if ((Ldata.npos == 0 && !Ldata.negate) ||
+ (Rdata.npos == 0 && !Rdata.negate))
+ return (flags & TS_EXEC_PHRASE_NO_POS) ? true : false;
- data->allocated = true;
- data->npos = 0;
- pos_iter = data->pos;
- }
+ if (curitem->qoperator.oper == OP_PHRASE)
+ {
+ /*
+ * Compute Loffset and Roffset suitable for phrase match, and
+ * compute overall width of whole phrase match.
+ */
+ Loffset = curitem->qoperator.distance + Rdata.width;
+ Roffset = 0;
+ if (data)
+ data->width = curitem->qoperator.distance +
+ Ldata.width + Rdata.width;
+ }
+ else
+ {
+ /*
+ * For OP_AND, set output width and alignment like OP_OR (see
+ * comment below)
+ */
+ maxwidth = Max(Ldata.width, Rdata.width);
+ Loffset = maxwidth - Ldata.width;
+ Roffset = maxwidth - Rdata.width;
+ if (data)
+ data->width = maxwidth;
+ }
- /*
- * Find matches by distance. WEP_GETPOS() is needed because
- * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
- *
- * Note that the output positions are those of the matching RIGHT
- * operands.
- */
- Rpos = Rdata.pos;
- LposStart = Ldata.pos;
- while (Rpos < Rdata.pos + Rdata.npos)
- {
- /*
- * We need to check all possible distances, so reset Lpos to
- * guaranteed not yet satisfied position.
- */
- Lpos = LposStart;
- while (Lpos < Ldata.pos + Ldata.npos)
+ if (Ldata.negate && Rdata.negate)
{
- if (WEP_GETPOS(*Rpos) - WEP_GETPOS(*Lpos) ==
- curitem->qoperator.distance)
- {
- /* MATCH! */
- if (data)
- {
- /* Store position for upper phrase operator */
- *pos_iter = WEP_GETPOS(*Rpos);
- pos_iter++;
-
- /*
- * Set left start position to next, because current
- * one could not satisfy distance for any other right
- * position
- */
- LposStart = Lpos + 1;
- break;
- }
- else
- {
- /*
- * We are at the root of the phrase tree and hence we
- * don't have to identify all the match positions.
- * Just report success.
- */
- return true;
- }
+ /* !L & !R: treat as !(L | R) */
+ (void) TS_phrase_output(data, &Ldata, &Rdata,
+ TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
+ Loffset, Roffset,
+ Ldata.npos + Rdata.npos);
+ if (data)
+ data->negate = true;
+ return true;
+ }
+ else if (Ldata.negate)
+ {
+ /* !L & R */
+ return TS_phrase_output(data, &Ldata, &Rdata,
+ TSPO_R_ONLY,
+ Loffset, Roffset,
+ Rdata.npos);
+ }
+ else if (Rdata.negate)
+ {
+ /* L & !R */
+ return TS_phrase_output(data, &Ldata, &Rdata,
+ TSPO_L_ONLY,
+ Loffset, Roffset,
+ Ldata.npos);
+ }
+ else
+ {
+ /* straight AND */
+ return TS_phrase_output(data, &Ldata, &Rdata,
+ TSPO_BOTH,
+ Loffset, Roffset,
+ Min(Ldata.npos, Rdata.npos));
+ }
- }
- else if (WEP_GETPOS(*Rpos) <= WEP_GETPOS(*Lpos) ||
- WEP_GETPOS(*Rpos) - WEP_GETPOS(*Lpos) <
- curitem->qoperator.distance)
- {
- /*
- * Go to the next Rpos, because Lpos is ahead or on less
- * distance than required by current operator
- */
- break;
+ case OP_OR:
+ memset(&Ldata, 0, sizeof(Ldata));
+ memset(&Rdata, 0, sizeof(Rdata));
- }
+ lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
+ arg, flags, chkcond, &Ldata);
+ rmatch = TS_phrase_execute(curitem + 1,
+ arg, flags, chkcond, &Rdata);
- Lpos++;
- }
+ if (!lmatch && !rmatch)
+ return false;
- Rpos++;
- }
+ /*
+ * If a valid operand has no position information, then we can't
+ * return position data, only a "possible match" result. "Possible
+ * match" answers are only wanted when TS_EXEC_PHRASE_NO_POS flag
+ * is set, otherwise return false.
+ */
+ if ((lmatch && Ldata.npos == 0 && !Ldata.negate) ||
+ (rmatch && Rdata.npos == 0 && !Rdata.negate))
+ return (flags & TS_EXEC_PHRASE_NO_POS) ? true : false;
- if (data)
- {
- data->npos = pos_iter - data->pos;
+ /*
+ * Cope with undefined output width from failed submatch. (This
+ * takes less code than trying to ensure that all failure returns
+ * set data->width to zero.)
+ */
+ if (!lmatch)
+ Ldata.width = 0;
+ if (!rmatch)
+ Rdata.width = 0;
- if (data->npos > 0)
+ /*
+ * For OP_AND and OP_OR, report the width of the wider of the two
+ * inputs, and align the narrower input's positions to the right
+ * end of that width. This rule deals at least somewhat
+ * reasonably with cases like "x <-> (y | z <-> q)".
+ */
+ maxwidth = Max(Ldata.width, Rdata.width);
+ Loffset = maxwidth - Ldata.width;
+ Roffset = maxwidth - Rdata.width;
+ data->width = maxwidth;
+
+ if (Ldata.negate && Rdata.negate)
+ {
+ /* !L | !R: treat as !(L & R) */
+ (void) TS_phrase_output(data, &Ldata, &Rdata,
+ TSPO_BOTH,
+ Loffset, Roffset,
+ Min(Ldata.npos, Rdata.npos));
+ data->negate = true;
return true;
- }
+ }
+ else if (Ldata.negate)
+ {
+ /* !L | R: treat as !(L & !R) */
+ (void) TS_phrase_output(data, &Ldata, &Rdata,
+ TSPO_L_ONLY,
+ Loffset, Roffset,
+ Ldata.npos);
+ data->negate = true;
+ return true;
+ }
+ else if (Rdata.negate)
+ {
+ /* L | !R: treat as !(!L & R) */
+ (void) TS_phrase_output(data, &Ldata, &Rdata,
+ TSPO_R_ONLY,
+ Loffset, Roffset,
+ Rdata.npos);
+ data->negate = true;
+ return true;
+ }
+ else
+ {
+ /* straight OR */
+ return TS_phrase_output(data, &Ldata, &Rdata,
+ TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
+ Loffset, Roffset,
+ Ldata.npos + Rdata.npos);
+ }
+
+ default:
+ elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
}
+ /* not reachable, but keep compiler quiet */
return false;
}
@@ -1594,12 +1842,7 @@ TS_execute(QueryItem *curitem, void *arg, uint32 flags,
return TS_execute(curitem + 1, arg, flags, chkcond);
case OP_PHRASE:
-
- /*
- * do not check TS_EXEC_PHRASE_AS_AND here because chkcond() could
- * do something more if it's called from TS_phrase_execute()
- */
- return TS_phrase_execute(curitem, arg, flags, NULL, chkcond);
+ return TS_phrase_execute(curitem, arg, flags, chkcond, NULL);
default:
elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h
index 1fbd983898..81602d1adc 100644
--- a/src/include/tsearch/ts_utils.h
+++ b/src/include/tsearch/ts_utils.h
@@ -113,8 +113,8 @@ extern text *generateHeadline(HeadlineParsedText *prs);
* struct ExecPhraseData is passed to a TSExecuteCallback function if we need
* lexeme position data (because of a phrase-match operator in the tsquery).
* The callback should fill in position data when it returns true (success).
- * If it cannot return position data, it may ignore its "data" argument, but
- * then the caller of TS_execute() must pass the TS_EXEC_PHRASE_AS_AND flag
+ * If it cannot return position data, it may leave "data" unchanged, but
+ * then the caller of TS_execute() must pass the TS_EXEC_PHRASE_NO_POS flag
* and must arrange for a later recheck with position data available.
*
* The reported lexeme positions must be sorted and unique. Callers must only
@@ -123,13 +123,21 @@ extern text *generateHeadline(HeadlineParsedText *prs);
* portion of a tsvector value. If "allocated" is true then the pos array
* is palloc'd workspace and caller may free it when done.
*
+ * "negate" means that the pos array contains positions where the query does
+ * not match, rather than positions where it does. "width" is positive when
+ * the match is wider than one lexeme. Neither of these fields normally need
+ * to be touched by TSExecuteCallback functions; they are used for
+ * phrase-search processing within TS_execute.
+ *
* All fields of the ExecPhraseData struct are initially zeroed by caller.
*/
typedef struct ExecPhraseData
{
int npos; /* number of positions reported */
bool allocated; /* pos points to palloc'd data? */
+ bool negate; /* positions are where query is NOT matched */
WordEntryPos *pos; /* ordered, non-duplicate lexeme positions */
+ int width; /* width of match in lexemes, less 1 */
} ExecPhraseData;
/*
@@ -139,7 +147,9 @@ typedef struct ExecPhraseData
* val: lexeme to test for presence of
* data: to be filled with lexeme positions; NULL if position data not needed
*
- * Return TRUE if lexeme is present in data, else FALSE
+ * Return TRUE if lexeme is present in data, else FALSE. If data is not
+ * NULL, it should be filled with lexeme positions, but function can leave
+ * it as zeroes if position data is not available.
*/
typedef bool (*TSExecuteCallback) (void *arg, QueryOperand *val,
ExecPhraseData *data);
@@ -151,15 +161,18 @@ typedef bool (*TSExecuteCallback) (void *arg, QueryOperand *val,
/*
* If TS_EXEC_CALC_NOT is not set, then NOT expressions are automatically
* evaluated to be true. Useful in cases where NOT cannot be accurately
- * computed (GiST) or it isn't important (ranking).
+ * computed (GiST) or it isn't important (ranking). From TS_execute's
+ * perspective, !CALC_NOT means that the TSExecuteCallback function might
+ * return false-positive indications of a lexeme's presence.
*/
#define TS_EXEC_CALC_NOT (0x01)
/*
- * Treat OP_PHRASE as OP_AND. Used when positional information is not
- * accessible, like in consistent methods of GIN/GiST indexes; rechecking
- * must occur later.
+ * If TS_EXEC_PHRASE_NO_POS is set, allow OP_PHRASE to be executed lossily
+ * in the absence of position information: a TRUE result indicates that the
+ * phrase might be present. Without this flag, OP_PHRASE always returns
+ * false if lexeme position information is not available.
*/
-#define TS_EXEC_PHRASE_AS_AND (0x02)
+#define TS_EXEC_PHRASE_NO_POS (0x02)
extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags,
TSExecuteCallback chkcond);
@@ -228,7 +241,7 @@ extern Datum gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS);
* TSQuery Utilities
*/
extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len);
-extern TSQuery cleanup_fakeval_and_phrase(TSQuery in);
+extern TSQuery cleanup_tsquery_stopwords(TSQuery in);
typedef struct QTNode
{
diff --git a/src/test/regress/expected/tsdicts.out b/src/test/regress/expected/tsdicts.out
index c55591a678..8ed64d3c68 100644
--- a/src/test/regress/expected/tsdicts.out
+++ b/src/test/regress/expected/tsdicts.out
@@ -470,15 +470,15 @@ SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
(1 row)
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b <-> sky');
- to_tsquery
------------------------------------------------------------------
- 'foot':B <-> 'sky' & 'ball':B <-> 'sky' & 'klubber':B <-> 'sky'
+ to_tsquery
+-------------------------------------------------
+ ( 'foot':B & 'ball':B & 'klubber':B ) <-> 'sky'
(1 row)
SELECT phraseto_tsquery('hunspell_tst', 'footballyklubber sky');
- phraseto_tsquery
------------------------------------------------------------
- 'foot' <-> 'sky' & 'ball' <-> 'sky' & 'klubber' <-> 'sky'
+ phraseto_tsquery
+-------------------------------------------
+ ( 'foot' & 'ball' & 'klubber' ) <-> 'sky'
(1 row)
-- Test ispell dictionary with hunspell affix with FLAG long in configuration
diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out
index cf3beb35a9..0681d43358 100644
--- a/src/test/regress/expected/tsearch.out
+++ b/src/test/regress/expected/tsearch.out
@@ -556,15 +556,15 @@ SELECT plainto_tsquery('english', 'foo bar') && 'asd | fg';
-- Check stop word deletion, a and s are stop-words
SELECT to_tsquery('english', '!(a & !b) & c');
- to_tsquery
-------------
- 'b' & 'c'
+ to_tsquery
+-------------
+ !!'b' & 'c'
(1 row)
SELECT to_tsquery('english', '!(a & !b)');
to_tsquery
------------
- 'b'
+ !!'b'
(1 row)
SELECT to_tsquery('english', '(1 <-> 2) <-> a');
@@ -1240,15 +1240,15 @@ SELECT ts_rewrite('1 & (2 <2> 3)', 'SELECT keyword, sample FROM test_tsquery'::t
(1 row)
SELECT ts_rewrite('5 <-> (1 & (2 <-> 3))', 'SELECT keyword, sample FROM test_tsquery'::text );
- ts_rewrite
----------------------------------------
- '5' <-> '1' & '5' <-> ( '2' <-> '3' )
+ ts_rewrite
+-------------------------
+ '5' <-> ( '2' <-> '4' )
(1 row)
SELECT ts_rewrite('5 <-> (6 | 8)', 'SELECT keyword, sample FROM test_tsquery'::text );
- ts_rewrite
----------------------------
- '5' <-> '7' | '5' <-> '8'
+ ts_rewrite
+-----------------------
+ '5' <-> ( '6' | '8' )
(1 row)
-- Check empty substitution
@@ -1386,6 +1386,26 @@ SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_t
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | 'big' & 'appl' | 'new' & 'york' )
(1 row)
+SELECT ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz');
+ ts_rewrite
+-----------------------------------------
+ ( 'bar' | 'baz' ) <-> ( 'bar' | 'baz' )
+(1 row)
+
+SELECT to_tsvector('foo bar') @@
+ ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz');
+ ?column?
+----------
+ f
+(1 row)
+
+SELECT to_tsvector('bar baz') @@
+ ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz');
+ ?column?
+----------
+ t
+(1 row)
+
RESET enable_seqscan;
--test GUC
SET default_text_search_config=simple;
diff --git a/src/test/regress/expected/tstypes.out b/src/test/regress/expected/tstypes.out
index 8d9290cbac..dcce82fdc4 100644
--- a/src/test/regress/expected/tstypes.out
+++ b/src/test/regress/expected/tstypes.out
@@ -366,133 +366,6 @@ SELECT '!!a & !!b'::tsquery;
!!'a' & !!'b'
(1 row)
--- phrase transformation
-SELECT 'a <-> (b|c)'::tsquery;
- tsquery
----------------------------
- 'a' <-> 'b' | 'a' <-> 'c'
-(1 row)
-
-SELECT '(a|b) <-> c'::tsquery;
- tsquery
----------------------------
- 'a' <-> 'c' | 'b' <-> 'c'
-(1 row)
-
-SELECT '(a|b) <-> (d|c)'::tsquery;
- tsquery
--------------------------------------------------------
- 'a' <-> 'd' | 'b' <-> 'd' | 'a' <-> 'c' | 'b' <-> 'c'
-(1 row)
-
-SELECT 'a <-> (b&c)'::tsquery;
- tsquery
----------------------------
- 'a' <-> 'b' & 'a' <-> 'c'
-(1 row)
-
-SELECT '(a&b) <-> c'::tsquery;
- tsquery
----------------------------
- 'a' <-> 'c' & 'b' <-> 'c'
-(1 row)
-
-SELECT '(a&b) <-> (d&c)'::tsquery;
- tsquery
--------------------------------------------------------
- 'a' <-> 'd' & 'b' <-> 'd' & 'a' <-> 'c' & 'b' <-> 'c'
-(1 row)
-
-SELECT 'a <-> !b'::tsquery;
- tsquery
-------------------------
- 'a' & !( 'a' <-> 'b' )
-(1 row)
-
-SELECT '!a <-> b'::tsquery;
- tsquery
-------------------------
- !( 'a' <-> 'b' ) & 'b'
-(1 row)
-
-SELECT '!a <-> !b'::tsquery;
- tsquery
-------------------------------------
- !'a' & !( !( 'a' <-> 'b' ) & 'b' )
-(1 row)
-
-SELECT 'a <-> !(b&c)'::tsquery;
- tsquery
---------------------------------------
- 'a' & !( 'a' <-> 'b' & 'a' <-> 'c' )
-(1 row)
-
-SELECT 'a <-> !(b|c)'::tsquery;
- tsquery
---------------------------------------
- 'a' & !( 'a' <-> 'b' | 'a' <-> 'c' )
-(1 row)
-
-SELECT '!(a&b) <-> c'::tsquery;
- tsquery
---------------------------------------
- !( 'a' <-> 'c' & 'b' <-> 'c' ) & 'c'
-(1 row)
-
-SELECT '!(a|b) <-> c'::tsquery;
- tsquery
---------------------------------------
- !( 'a' <-> 'c' | 'b' <-> 'c' ) & 'c'
-(1 row)
-
-SELECT '(!a|b) <-> c'::tsquery;
- tsquery
---------------------------------------
- !( 'a' <-> 'c' ) & 'c' | 'b' <-> 'c'
-(1 row)
-
-SELECT '(!a&b) <-> c'::tsquery;
- tsquery
---------------------------------------
- !( 'a' <-> 'c' ) & 'c' & 'b' <-> 'c'
-(1 row)
-
-SELECT 'c <-> (!a|b)'::tsquery;
- tsquery
---------------------------------------
- 'c' & !( 'c' <-> 'a' ) | 'c' <-> 'b'
-(1 row)
-
-SELECT 'c <-> (!a&b)'::tsquery;
- tsquery
---------------------------------------
- 'c' & !( 'c' <-> 'a' ) & 'c' <-> 'b'
-(1 row)
-
-SELECT '(a|b) <-> !c'::tsquery;
- tsquery
-------------------------------------------------
- ( 'a' | 'b' ) & !( 'a' <-> 'c' | 'b' <-> 'c' )
-(1 row)
-
-SELECT '(a&b) <-> !c'::tsquery;
- tsquery
---------------------------------------------
- 'a' & 'b' & !( 'a' <-> 'c' & 'b' <-> 'c' )
-(1 row)
-
-SELECT '!c <-> (a|b)'::tsquery;
- tsquery
--------------------------------------------------
- !( 'c' <-> 'a' ) & 'a' | !( 'c' <-> 'b' ) & 'b'
-(1 row)
-
-SELECT '!c <-> (a&b)'::tsquery;
- tsquery
--------------------------------------------------
- !( 'c' <-> 'a' ) & 'a' & !( 'c' <-> 'b' ) & 'b'
-(1 row)
-
--comparisons
SELECT 'a' < 'b & c'::tsquery as "true";
true
@@ -568,33 +441,33 @@ SELECT 'foo & bar'::tsquery && 'asd | fg';
(1 row)
SELECT 'a' <-> 'b & d'::tsquery;
- ?column?
----------------------------
- 'a' <-> 'b' & 'a' <-> 'd'
+ ?column?
+-----------------------
+ 'a' <-> ( 'b' & 'd' )
(1 row)
SELECT 'a & g' <-> 'b & d'::tsquery;
- ?column?
--------------------------------------------------------
- 'a' <-> 'b' & 'g' <-> 'b' & 'a' <-> 'd' & 'g' <-> 'd'
+ ?column?
+---------------------------------
+ ( 'a' & 'g' ) <-> ( 'b' & 'd' )
(1 row)
SELECT 'a & g' <-> 'b | d'::tsquery;
- ?column?
--------------------------------------------------------
- 'a' <-> 'b' & 'g' <-> 'b' | 'a' <-> 'd' & 'g' <-> 'd'
+ ?column?
+---------------------------------
+ ( 'a' & 'g' ) <-> ( 'b' | 'd' )
(1 row)
SELECT 'a & g' <-> 'b <-> d'::tsquery;
- ?column?
----------------------------------------------------
- 'a' <-> ( 'b' <-> 'd' ) & 'g' <-> ( 'b' <-> 'd' )
+ ?column?
+-----------------------------------
+ ( 'a' & 'g' ) <-> ( 'b' <-> 'd' )
(1 row)
SELECT tsquery_phrase('a <3> g', 'b & d', 10);
- tsquery_phrase
----------------------------------------------
- 'a' <3> 'g' <10> 'b' & 'a' <3> 'g' <10> 'd'
+ tsquery_phrase
+--------------------------------
+ 'a' <3> 'g' <10> ( 'b' & 'd' )
(1 row)
-- tsvector-tsquery operations
@@ -749,25 +622,152 @@ SELECT to_tsvector('simple', '1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true";
t
(1 row)
-SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> (2 <-> 3)' AS "false";
+SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> (2 <-> 3)' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <2> (2 <-> 3)' AS "false";
false
-------
f
(1 row)
-SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <2> (2 <-> 3)' AS "true";
+SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true";
true
------
t
(1 row)
-SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true";
+SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true";
true
------
t
(1 row)
-SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true";
+-- without position data, phrase search does not match
+SELECT strip(to_tsvector('simple', '1 2 3 4')) @@ '1 <-> 2 <-> 3' AS "false";
+ false
+-------
+ f
+(1 row)
+
+select to_tsvector('simple', 'q x q y') @@ 'q <-> (x & y)' AS "false";
+ false
+-------
+ f
+(1 row)
+
+select to_tsvector('simple', 'q x') @@ 'q <-> (x | y <-> z)' AS "true";
+ true
+------
+ t
+(1 row)
+
+select to_tsvector('simple', 'q y') @@ 'q <-> (x | y <-> z)' AS "false";
+ false
+-------
+ f
+(1 row)
+
+select to_tsvector('simple', 'q y z') @@ 'q <-> (x | y <-> z)' AS "true";
+ true
+------
+ t
+(1 row)
+
+select to_tsvector('simple', 'q y x') @@ 'q <-> (x | y <-> z)' AS "false";
+ false
+-------
+ f
+(1 row)
+
+select to_tsvector('simple', 'q x y') @@ 'q <-> (x | y <-> z)' AS "true";
+ true
+------
+ t
+(1 row)
+
+select to_tsvector('simple', 'q x') @@ '(x | y <-> z) <-> q' AS "false";
+ false
+-------
+ f
+(1 row)
+
+select to_tsvector('simple', 'x q') @@ '(x | y <-> z) <-> q' AS "true";
+ true
+------
+ t
+(1 row)
+
+select to_tsvector('simple', 'x y q') @@ '(x | y <-> z) <-> q' AS "false";
+ false
+-------
+ f
+(1 row)
+
+select to_tsvector('simple', 'x y z') @@ '(x | y <-> z) <-> q' AS "false";
+ false
+-------
+ f
+(1 row)
+
+select to_tsvector('simple', 'x y z q') @@ '(x | y <-> z) <-> q' AS "true";
+ true
+------
+ t
+(1 row)
+
+select to_tsvector('simple', 'y z q') @@ '(x | y <-> z) <-> q' AS "true";
+ true
+------
+ t
+(1 row)
+
+select to_tsvector('simple', 'y y q') @@ '(x | y <-> z) <-> q' AS "false";
+ false
+-------
+ f
+(1 row)
+
+select to_tsvector('simple', 'y y q') @@ '(!x | y <-> z) <-> q' AS "true";
+ true
+------
+ t
+(1 row)
+
+select to_tsvector('simple', 'x y q') @@ '(!x | y <-> z) <-> q' AS "true";
+ true
+------
+ t
+(1 row)
+
+select to_tsvector('simple', 'y y q') @@ '(x | y <-> !z) <-> q' AS "true";
+ true
+------
+ t
+(1 row)
+
+select to_tsvector('simple', 'x q') @@ '(x | y <-> !z) <-> q' AS "true";
+ true
+------
+ t
+(1 row)
+
+select to_tsvector('simple', 'x q') @@ '(!x | y <-> z) <-> q' AS "false";
+ false
+-------
+ f
+(1 row)
+
+select to_tsvector('simple', 'z q') @@ '(!x | y <-> z) <-> q' AS "true";
+ true
+------
+ t
+(1 row)
+
+select to_tsvector('simple', 'x y q y') @@ '!x <-> y' AS "true";
true
------
t
@@ -1002,6 +1002,12 @@ SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "false";
f
(1 row)
+SELECT 'a:1 b:3'::tsvector @@ 'a <0> a:*'::tsquery AS "true";
+ true
+------
+ t
+(1 row)
+
-- tsvector editing operations
SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
strip
diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql
index de43860c70..1255f6954d 100644
--- a/src/test/regress/sql/tsearch.sql
+++ b/src/test/regress/sql/tsearch.sql
@@ -447,6 +447,12 @@ SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_t
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow & hotel') AS query;
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & new & qq & foo & york') AS query;
+SELECT ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz');
+SELECT to_tsvector('foo bar') @@
+ ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz');
+SELECT to_tsvector('bar baz') @@
+ ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz');
+
RESET enable_seqscan;
--test GUC
diff --git a/src/test/regress/sql/tstypes.sql b/src/test/regress/sql/tstypes.sql
index 9ea93a2993..77436ce04e 100644
--- a/src/test/regress/sql/tstypes.sql
+++ b/src/test/regress/sql/tstypes.sql
@@ -64,34 +64,6 @@ SELECT 'a & !!b'::tsquery;
SELECT '!!a & b'::tsquery;
SELECT '!!a & !!b'::tsquery;
--- phrase transformation
-SELECT 'a <-> (b|c)'::tsquery;
-SELECT '(a|b) <-> c'::tsquery;
-SELECT '(a|b) <-> (d|c)'::tsquery;
-
-SELECT 'a <-> (b&c)'::tsquery;
-SELECT '(a&b) <-> c'::tsquery;
-SELECT '(a&b) <-> (d&c)'::tsquery;
-
-SELECT 'a <-> !b'::tsquery;
-SELECT '!a <-> b'::tsquery;
-SELECT '!a <-> !b'::tsquery;
-
-SELECT 'a <-> !(b&c)'::tsquery;
-SELECT 'a <-> !(b|c)'::tsquery;
-SELECT '!(a&b) <-> c'::tsquery;
-SELECT '!(a|b) <-> c'::tsquery;
-
-SELECT '(!a|b) <-> c'::tsquery;
-SELECT '(!a&b) <-> c'::tsquery;
-SELECT 'c <-> (!a|b)'::tsquery;
-SELECT 'c <-> (!a&b)'::tsquery;
-
-SELECT '(a|b) <-> !c'::tsquery;
-SELECT '(a&b) <-> !c'::tsquery;
-SELECT '!c <-> (a|b)'::tsquery;
-SELECT '!c <-> (a&b)'::tsquery;
-
--comparisons
SELECT 'a' < 'b & c'::tsquery as "true";
SELECT 'a' > 'b & c'::tsquery as "false";
@@ -146,10 +118,33 @@ SELECT to_tsvector('simple', '1 2 11 3') @@ '1:* <-> 3' AS "true";
SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true";
SELECT to_tsvector('simple', '1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true";
-SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> (2 <-> 3)' AS "false";
-SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <2> (2 <-> 3)' AS "true";
+SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> (2 <-> 3)' AS "true";
+SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <2> (2 <-> 3)' AS "false";
SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true";
SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true";
+-- without position data, phrase search does not match
+SELECT strip(to_tsvector('simple', '1 2 3 4')) @@ '1 <-> 2 <-> 3' AS "false";
+
+select to_tsvector('simple', 'q x q y') @@ 'q <-> (x & y)' AS "false";
+select to_tsvector('simple', 'q x') @@ 'q <-> (x | y <-> z)' AS "true";
+select to_tsvector('simple', 'q y') @@ 'q <-> (x | y <-> z)' AS "false";
+select to_tsvector('simple', 'q y z') @@ 'q <-> (x | y <-> z)' AS "true";
+select to_tsvector('simple', 'q y x') @@ 'q <-> (x | y <-> z)' AS "false";
+select to_tsvector('simple', 'q x y') @@ 'q <-> (x | y <-> z)' AS "true";
+select to_tsvector('simple', 'q x') @@ '(x | y <-> z) <-> q' AS "false";
+select to_tsvector('simple', 'x q') @@ '(x | y <-> z) <-> q' AS "true";
+select to_tsvector('simple', 'x y q') @@ '(x | y <-> z) <-> q' AS "false";
+select to_tsvector('simple', 'x y z') @@ '(x | y <-> z) <-> q' AS "false";
+select to_tsvector('simple', 'x y z q') @@ '(x | y <-> z) <-> q' AS "true";
+select to_tsvector('simple', 'y z q') @@ '(x | y <-> z) <-> q' AS "true";
+select to_tsvector('simple', 'y y q') @@ '(x | y <-> z) <-> q' AS "false";
+select to_tsvector('simple', 'y y q') @@ '(!x | y <-> z) <-> q' AS "true";
+select to_tsvector('simple', 'x y q') @@ '(!x | y <-> z) <-> q' AS "true";
+select to_tsvector('simple', 'y y q') @@ '(x | y <-> !z) <-> q' AS "true";
+select to_tsvector('simple', 'x q') @@ '(x | y <-> !z) <-> q' AS "true";
+select to_tsvector('simple', 'x q') @@ '(!x | y <-> z) <-> q' AS "false";
+select to_tsvector('simple', 'z q') @@ '(!x | y <-> z) <-> q' AS "true";
+select to_tsvector('simple', 'x y q y') @@ '!x <-> y' AS "true";
--ranking
SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s');
@@ -193,6 +188,7 @@ SELECT 'a:1 b:3'::tsvector @@ 'a <0> b'::tsquery AS "false";
SELECT 'a:1 b:3'::tsvector @@ 'a <1> b'::tsquery AS "false";
SELECT 'a:1 b:3'::tsvector @@ 'a <2> b'::tsquery AS "true";
SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "false";
+SELECT 'a:1 b:3'::tsvector @@ 'a <0> a:*'::tsquery AS "true";
-- tsvector editing operations
--
cgit v1.2.3
From 4032ef18d06aa7a3db515926ddebe8af04e533fe Mon Sep 17 00:00:00 2001
From: Michael Meskes
Date: Thu, 22 Dec 2016 08:28:13 +0100
Subject: Fix buffer overflow on particularly named files and clarify
documentation about output file naming.
Patch by Tsunakawa, Takayuki
---
doc/src/sgml/ref/ecpg-ref.sgml | 8 +++-----
src/interfaces/ecpg/preproc/ecpg.c | 3 ++-
2 files changed, 5 insertions(+), 6 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/ecpg-ref.sgml b/doc/src/sgml/ref/ecpg-ref.sgml
index 029bd4a4d2..8bfb47c4d7 100644
--- a/doc/src/sgml/ref/ecpg-ref.sgml
+++ b/doc/src/sgml/ref/ecpg-ref.sgml
@@ -42,11 +42,9 @@ PostgreSQL documentation
ecpg will convert each input file given on the
command line to the corresponding C output file. Input files
- preferably have the extension .pgc, in which
- case the extension will be replaced by .c to
- determine the output file name. If the extension of the input file
- is not .pgc, then the output file name is
- computed by appending .c to the full file name.
+ preferably have the extension .pgc.
+ The extension will be replaced by .c to
+ determine the output file name.
The output file name can also be overridden using the
-o
option.
diff --git a/src/interfaces/ecpg/preproc/ecpg.c b/src/interfaces/ecpg/preproc/ecpg.c
index 3b0de97321..59dce140a9 100644
--- a/src/interfaces/ecpg/preproc/ecpg.c
+++ b/src/interfaces/ecpg/preproc/ecpg.c
@@ -313,7 +313,8 @@ main(int argc, char *const argv[])
base_yyout = stdout;
else
{
- output_filename = mm_strdup(input_filename);
+ output_filename = mm_alloc(strlen(input_filename) + 3);
+ strcpy(output_filename, input_filename);
ptr2ext = strrchr(output_filename, '.');
/* make extension = .c resp. .h */
--
cgit v1.2.3
From 6ef2eba3f57f17960b7cd4958e18aa79e357de2f Mon Sep 17 00:00:00 2001
From: Andres Freund
Date: Thu, 22 Dec 2016 11:31:50 -0800
Subject: Skip checkpoints, archiving on idle systems.
Some background activity (like checkpoints, archive timeout, standby
snapshots) is not supposed to happen on an idle system. Unfortunately
so far it was not easy to determine when a system is idle, which
defeated some of the attempts to avoid redundant activity on an idle
system.
To make that easier, allow to make individual WAL insertions as not
being "important". By checking whether any important activity happened
since the last time an activity was performed, it now is easy to check
whether some action needs to be repeated.
Use the new facility for checkpoints, archive timeout and standby
snapshots.
The lack of a facility causes some issues in older releases, but in my
opinion the consequences (superflous checkpoints / archived segments)
aren't grave enough to warrant backpatching.
Author: Michael Paquier, editorialized by Andres Freund
Reviewed-By: Andres Freund, David Steele, Amit Kapila, Kyotaro HORIGUCHI
Bug: #13685
Discussion:
https://www.postgresql.org/message-id/20151016203031.3019.72930@wrigleys.postgresql.org
https://www.postgresql.org/message-id/CAB7nPqQcPqxEM3S735Bd2RzApNqSNJVietAC=6kfkYv_45dKwA@mail.gmail.com
Backpatch: -
---
doc/src/sgml/config.sgml | 10 +--
src/backend/access/heap/heapam.c | 10 +--
src/backend/access/transam/xact.c | 2 +-
src/backend/access/transam/xlog.c | 118 +++++++++++++++++++++++-------
src/backend/access/transam/xlogfuncs.c | 2 +-
src/backend/access/transam/xloginsert.c | 24 ++++--
src/backend/postmaster/bgwriter.c | 8 +-
src/backend/postmaster/checkpointer.c | 45 ++++++++----
src/backend/replication/logical/message.c | 2 +-
src/backend/storage/ipc/standby.c | 11 ++-
src/include/access/xlog.h | 12 ++-
src/include/access/xlog_internal.h | 4 +-
src/include/access/xloginsert.h | 2 +-
13 files changed, 173 insertions(+), 77 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 1b98c416e0..b6b20a368e 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -2852,12 +2852,10 @@ include_dir 'conf.d'
parameter is greater than zero, the server will switch to a new
segment file whenever this many seconds have elapsed since the last
segment file switch, and there has been any database activity,
- including a single checkpoint. (Increasing
- checkpoint_timeout> will reduce unnecessary
- checkpoints on an idle system.)
- Note that archived files that are closed early
- due to a forced switch are still the same length as completely full
- files. Therefore, it is unwise to use a very short
+ including a single checkpoint (checkpoints are skipped if there is
+ no database activity). Note that archived files that are closed
+ early due to a forced switch are still the same length as completely
+ full files. Therefore, it is unwise to use a very short
archive_timeout> — it will bloat your archive
storage. archive_timeout> settings of a minute or so are
usually reasonable. You should consider using streaming replication,
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index b019bc1a0d..ea579a00be 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2507,7 +2507,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
heaptup->t_len - SizeofHeapTupleHeader);
/* filtering by origin on a row level is much more efficient */
- XLogIncludeOrigin();
+ XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
recptr = XLogInsert(RM_HEAP_ID, info);
@@ -2846,7 +2846,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
XLogRegisterBufData(0, tupledata, totaldatalen);
/* filtering by origin on a row level is much more efficient */
- XLogIncludeOrigin();
+ XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
recptr = XLogInsert(RM_HEAP2_ID, info);
@@ -3308,7 +3308,7 @@ l1:
}
/* filtering by origin on a row level is much more efficient */
- XLogIncludeOrigin();
+ XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
@@ -6035,7 +6035,7 @@ heap_finish_speculative(Relation relation, HeapTuple tuple)
XLogBeginInsert();
/* We want the same filtering on this as on a plain insert */
- XLogIncludeOrigin();
+ XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
XLogRegisterData((char *) &xlrec, SizeOfHeapConfirm);
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
@@ -7703,7 +7703,7 @@ log_heap_update(Relation reln, Buffer oldbuf,
}
/* filtering by origin on a row level is much more efficient */
- XLogIncludeOrigin();
+ XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
recptr = XLogInsert(RM_HEAP_ID, info);
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index d6432165f1..e47fd4497e 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -5234,7 +5234,7 @@ XactLogCommitRecord(TimestampTz commit_time,
XLogRegisterData((char *) (&xl_origin), sizeof(xl_xact_origin));
/* we allow filtering by xacts */
- XLogIncludeOrigin();
+ XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
return XLogInsert(RM_XACT_ID, info);
}
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index aa9ee5a0dd..f8ffa5c45c 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -442,11 +442,21 @@ typedef struct XLogwrtResult
* the WAL record is just copied to the page and the lock is released. But
* to avoid the deadlock-scenario explained above, the indicator is always
* updated before sleeping while holding an insertion lock.
+ *
+ * lastImportantAt contains the LSN of the last important WAL record inserted
+ * using a given lock. This value is used to detect if there has been
+ * important WAL activity since the last time some action, like a checkpoint,
+ * was performed - allowing to not repeat the action if not. The LSN is
+ * updated for all insertions, unless the XLOG_MARK_UNIMPORTANT flag was
+ * set. lastImportantAt is never cleared, only overwritten by the LSN of newer
+ * records. Tracking the WAL activity directly in WALInsertLock has the
+ * advantage of not needing any additional locks to update the value.
*/
typedef struct
{
LWLock lock;
XLogRecPtr insertingAt;
+ XLogRecPtr lastImportantAt;
} WALInsertLock;
/*
@@ -541,8 +551,9 @@ typedef struct XLogCtlData
XLogRecPtr unloggedLSN;
slock_t ulsn_lck;
- /* Time of last xlog segment switch. Protected by WALWriteLock. */
+ /* Time and LSN of last xlog segment switch. Protected by WALWriteLock. */
pg_time_t lastSegSwitchTime;
+ XLogRecPtr lastSegSwitchLSN;
/*
* Protected by info_lck and WALWriteLock (you must hold either lock to
@@ -884,6 +895,9 @@ static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt);
* which pages need a full-page image, and retry. If fpw_lsn is invalid, the
* record is always inserted.
*
+ * 'flags' gives more in-depth control on the record being inserted. See
+ * XLogSetRecordFlags() for details.
+ *
* The first XLogRecData in the chain must be for the record header, and its
* data must be MAXALIGNed. XLogInsertRecord fills in the xl_prev and
* xl_crc fields in the header, the rest of the header must already be filled
@@ -896,7 +910,9 @@ static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt);
* WAL rule "write the log before the data".)
*/
XLogRecPtr
-XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn)
+XLogInsertRecord(XLogRecData *rdata,
+ XLogRecPtr fpw_lsn,
+ uint8 flags)
{
XLogCtlInsert *Insert = &XLogCtl->Insert;
pg_crc32c rdata_crc;
@@ -1013,6 +1029,18 @@ XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn)
*/
CopyXLogRecordToWAL(rechdr->xl_tot_len, isLogSwitch, rdata,
StartPos, EndPos);
+
+ /*
+ * Unless record is flagged as not important, update LSN of last
+ * important record in the current slot. When holding all locks, just
+ * update the first one.
+ */
+ if ((flags & XLOG_MARK_UNIMPORTANT) == 0)
+ {
+ int lockno = holdingAllLocks ? 0 : MyLockNo;
+
+ WALInsertLocks[lockno].l.lastImportantAt = StartPos;
+ }
}
else
{
@@ -2332,6 +2360,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
XLogArchiveNotifySeg(openLogSegNo);
XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
+ XLogCtl->lastSegSwitchLSN = LogwrtResult.Flush;
/*
* Request a checkpoint if we've consumed too much xlog since
@@ -4715,6 +4744,7 @@ XLOGShmemInit(void)
{
LWLockInitialize(&WALInsertLocks[i].l.lock, LWTRANCHE_WAL_INSERT);
WALInsertLocks[i].l.insertingAt = InvalidXLogRecPtr;
+ WALInsertLocks[i].l.lastImportantAt = InvalidXLogRecPtr;
}
/*
@@ -7431,8 +7461,9 @@ StartupXLOG(void)
*/
InRecovery = false;
- /* start the archive_timeout timer running */
+ /* start the archive_timeout timer and LSN running */
XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
+ XLogCtl->lastSegSwitchLSN = EndOfLog;
/* also initialize latestCompletedXid, to nextXid - 1 */
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
@@ -7994,16 +8025,51 @@ GetFlushRecPtr(void)
}
/*
- * Get the time of the last xlog segment switch
+ * GetLastImportantRecPtr -- Returns the LSN of the last important record
+ * inserted. All records not explicitly marked as unimportant are considered
+ * important.
+ *
+ * The LSN is determined by computing the maximum of
+ * WALInsertLocks[i].lastImportantAt.
+ */
+XLogRecPtr
+GetLastImportantRecPtr(void)
+{
+ XLogRecPtr res = InvalidXLogRecPtr;
+ int i;
+
+ for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
+ {
+ XLogRecPtr last_important;
+
+ /*
+ * Need to take a lock to prevent torn reads of the LSN, which are
+ * possible on some of the supported platforms. WAL insert locks only
+ * support exclusive mode, so we have to use that.
+ */
+ LWLockAcquire(&WALInsertLocks[i].l.lock, LW_EXCLUSIVE);
+ last_important = WALInsertLocks[i].l.lastImportantAt;
+ LWLockRelease(&WALInsertLocks[i].l.lock);
+
+ if (res < last_important)
+ res = last_important;
+ }
+
+ return res;
+}
+
+/*
+ * Get the time and LSN of the last xlog segment switch
*/
pg_time_t
-GetLastSegSwitchTime(void)
+GetLastSegSwitchData(XLogRecPtr *lastSwitchLSN)
{
pg_time_t result;
/* Need WALWriteLock, but shared lock is sufficient */
LWLockAcquire(WALWriteLock, LW_SHARED);
result = XLogCtl->lastSegSwitchTime;
+ *lastSwitchLSN = XLogCtl->lastSegSwitchLSN;
LWLockRelease(WALWriteLock);
return result;
@@ -8065,7 +8131,7 @@ ShutdownXLOG(int code, Datum arg)
* record will go to the next XLOG file and won't be archived (yet).
*/
if (XLogArchivingActive() && XLogArchiveCommandSet())
- RequestXLogSwitch();
+ RequestXLogSwitch(false);
CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);
}
@@ -8253,7 +8319,7 @@ CreateCheckPoint(int flags)
uint32 freespace;
XLogRecPtr PriorRedoPtr;
XLogRecPtr curInsert;
- XLogRecPtr prevPtr;
+ XLogRecPtr last_important_lsn;
VirtualTransactionId *vxids;
int nvxids;
@@ -8333,39 +8399,34 @@ CreateCheckPoint(int flags)
else
checkPoint.oldestActiveXid = InvalidTransactionId;
+ /*
+ * Get location of last important record before acquiring insert locks (as
+ * GetLastImportantRecPtr() also locks WAL locks).
+ */
+ last_important_lsn = GetLastImportantRecPtr();
+
/*
* We must block concurrent insertions while examining insert state to
* determine the checkpoint REDO pointer.
*/
WALInsertLockAcquireExclusive();
curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos);
- prevPtr = XLogBytePosToRecPtr(Insert->PrevBytePos);
/*
- * If this isn't a shutdown or forced checkpoint, and we have not inserted
- * any XLOG records since the start of the last checkpoint, skip the
- * checkpoint. The idea here is to avoid inserting duplicate checkpoints
- * when the system is idle. That wastes log space, and more importantly it
- * exposes us to possible loss of both current and previous checkpoint
- * records if the machine crashes just as we're writing the update.
- * (Perhaps it'd make even more sense to checkpoint only when the previous
- * checkpoint record is in a different xlog page?)
- *
- * If the previous checkpoint crossed a WAL segment, however, we create
- * the checkpoint anyway, to have the latest checkpoint fully contained in
- * the new segment. This is for a little bit of extra robustness: it's
- * better if you don't need to keep two WAL segments around to recover the
- * checkpoint.
+ * If this isn't a shutdown or forced checkpoint, and if there has been no
+ * WAL activity requiring a checkpoint, skip it. The idea here is to
+ * avoid inserting duplicate checkpoints when the system is idle.
*/
if ((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY |
CHECKPOINT_FORCE)) == 0)
{
- if (prevPtr == ControlFile->checkPointCopy.redo &&
- prevPtr / XLOG_SEG_SIZE == curInsert / XLOG_SEG_SIZE)
+ if (last_important_lsn == ControlFile->checkPoint)
{
WALInsertLockRelease();
LWLockRelease(CheckpointLock);
END_CRIT_SECTION();
+ ereport(DEBUG1,
+ (errmsg("checkpoint skipped due to an idle system")));
return;
}
}
@@ -9122,12 +9183,15 @@ XLogPutNextOid(Oid nextOid)
* write a switch record because we are already at segment start.
*/
XLogRecPtr
-RequestXLogSwitch(void)
+RequestXLogSwitch(bool mark_unimportant)
{
XLogRecPtr RecPtr;
/* XLOG SWITCH has no data */
XLogBeginInsert();
+
+ if (mark_unimportant)
+ XLogSetRecordFlags(XLOG_MARK_UNIMPORTANT);
RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH);
return RecPtr;
@@ -9997,7 +10061,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
* recovery case described above.
*/
if (!backup_started_in_recovery)
- RequestXLogSwitch();
+ RequestXLogSwitch(false);
do
{
@@ -10582,7 +10646,7 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
* Force a switch to a new xlog segment file, so that the backup is valid
* as soon as archiver moves out the current segment file.
*/
- RequestXLogSwitch();
+ RequestXLogSwitch(false);
XLByteToPrevSeg(stoppoint, _logSegNo);
XLogFileName(stopxlogfilename, ThisTimeLineID, _logSegNo);
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index 01cbd90f40..bc7253fc9b 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -293,7 +293,7 @@ pg_switch_xlog(PG_FUNCTION_ARGS)
errmsg("recovery is in progress"),
errhint("WAL control functions cannot be executed during recovery.")));
- switchpoint = RequestXLogSwitch();
+ switchpoint = RequestXLogSwitch(false);
/*
* As a convenience, return the WAL location of the switch record
diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c
index 3cd273b19f..24e35a3845 100644
--- a/src/backend/access/transam/xloginsert.c
+++ b/src/backend/access/transam/xloginsert.c
@@ -73,8 +73,8 @@ static XLogRecData *mainrdata_head;
static XLogRecData *mainrdata_last = (XLogRecData *) &mainrdata_head;
static uint32 mainrdata_len; /* total # of bytes in chain */
-/* Should the in-progress insertion log the origin? */
-static bool include_origin = false;
+/* flags for the in-progress insertion */
+static uint8 curinsert_flags = 0;
/*
* These are used to hold the record header while constructing a record.
@@ -201,7 +201,7 @@ XLogResetInsertion(void)
max_registered_block_id = 0;
mainrdata_len = 0;
mainrdata_last = (XLogRecData *) &mainrdata_head;
- include_origin = false;
+ curinsert_flags = 0;
begininsert_called = false;
}
@@ -384,13 +384,20 @@ XLogRegisterBufData(uint8 block_id, char *data, int len)
}
/*
- * Should this record include the replication origin if one is set up?
+ * Set insert status flags for the upcoming WAL record.
+ *
+ * The flags that can be used here are:
+ * - XLOG_INCLUDE_ORIGIN, to determine if the replication origin should be
+ * included in the record.
+ * - XLOG_MARK_UNIMPORTANT, to signal that the record is not important for
+ * durability, which allows to avoid triggering WAL archiving and other
+ * background activity.
*/
void
-XLogIncludeOrigin(void)
+XLogSetRecordFlags(uint8 flags)
{
Assert(begininsert_called);
- include_origin = true;
+ curinsert_flags = flags;
}
/*
@@ -450,7 +457,7 @@ XLogInsert(RmgrId rmid, uint8 info)
rdt = XLogRecordAssemble(rmid, info, RedoRecPtr, doPageWrites,
&fpw_lsn);
- EndPos = XLogInsertRecord(rdt, fpw_lsn);
+ EndPos = XLogInsertRecord(rdt, fpw_lsn, curinsert_flags);
} while (EndPos == InvalidXLogRecPtr);
XLogResetInsertion();
@@ -701,7 +708,8 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
}
/* followed by the record's origin, if any */
- if (include_origin && replorigin_session_origin != InvalidRepOriginId)
+ if ((curinsert_flags & XLOG_INCLUDE_ORIGIN) &&
+ replorigin_session_origin != InvalidRepOriginId)
{
*(scratch++) = XLR_BLOCK_ID_ORIGIN;
memcpy(scratch, &replorigin_session_origin, sizeof(replorigin_session_origin));
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index a31d44e799..25020ab3b8 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -310,7 +310,7 @@ BackgroundWriterMain(void)
* check whether there has been any WAL inserted since the last time
* we've logged a running xacts.
*
- * We do this logging in the bgwriter as its the only process that is
+ * We do this logging in the bgwriter as it is the only process that is
* run regularly and returns to its mainloop all the time. E.g.
* Checkpointer, when active, is barely ever in its mainloop and thus
* makes it hard to log regularly.
@@ -324,11 +324,11 @@ BackgroundWriterMain(void)
LOG_SNAPSHOT_INTERVAL_MS);
/*
- * only log if enough time has passed and some xlog record has
- * been inserted.
+ * Only log if enough time has passed and interesting records have
+ * been inserted since the last snapshot.
*/
if (now >= timeout &&
- last_snapshot_lsn != GetXLogInsertRecPtr())
+ last_snapshot_lsn < GetLastImportantRecPtr())
{
last_snapshot_lsn = LogStandbySnapshot();
last_snapshot_ts = now;
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index 92b0a9416d..c875f40ece 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -573,15 +573,21 @@ CheckpointerMain(void)
/*
* CheckArchiveTimeout -- check for archive_timeout and switch xlog files
*
- * This will switch to a new WAL file and force an archive file write
- * if any activity is recorded in the current WAL file, including just
- * a single checkpoint record.
+ * This will switch to a new WAL file and force an archive file write if
+ * meaningful activity is recorded in the current WAL file. This includes most
+ * writes, including just a single checkpoint record, but excludes WAL records
+ * that were inserted with the XLOG_MARK_UNIMPORTANT flag being set (like
+ * snapshots of running transactions). Such records, depending on
+ * configuration, occur on regular intervals and don't contain important
+ * information. This avoids generating archives with a few unimportant
+ * records.
*/
static void
CheckArchiveTimeout(void)
{
pg_time_t now;
pg_time_t last_time;
+ XLogRecPtr last_switch_lsn;
if (XLogArchiveTimeout <= 0 || RecoveryInProgress())
return;
@@ -596,26 +602,33 @@ CheckArchiveTimeout(void)
* Update local state ... note that last_xlog_switch_time is the last time
* a switch was performed *or requested*.
*/
- last_time = GetLastSegSwitchTime();
+ last_time = GetLastSegSwitchData(&last_switch_lsn);
last_xlog_switch_time = Max(last_xlog_switch_time, last_time);
- /* Now we can do the real check */
+ /* Now we can do the real checks */
if ((int) (now - last_xlog_switch_time) >= XLogArchiveTimeout)
{
- XLogRecPtr switchpoint;
-
- /* OK, it's time to switch */
- switchpoint = RequestXLogSwitch();
-
/*
- * If the returned pointer points exactly to a segment boundary,
- * assume nothing happened.
+ * Switch segment only when "important" WAL has been logged since the
+ * last segment switch.
*/
- if ((switchpoint % XLogSegSize) != 0)
- ereport(DEBUG1,
- (errmsg("transaction log switch forced (archive_timeout=%d)",
- XLogArchiveTimeout)));
+ if (GetLastImportantRecPtr() > last_switch_lsn)
+ {
+ XLogRecPtr switchpoint;
+
+ /* mark switch as unimportant, avoids triggering checkpoints */
+ switchpoint = RequestXLogSwitch(true);
+
+ /*
+ * If the returned pointer points exactly to a segment boundary,
+ * assume nothing happened.
+ */
+ if ((switchpoint % XLogSegSize) != 0)
+ ereport(DEBUG1,
+ (errmsg("transaction log switch forced (archive_timeout=%d)",
+ XLogArchiveTimeout)));
+ }
/*
* Update state in any case, so we don't retry constantly when the
diff --git a/src/backend/replication/logical/message.c b/src/backend/replication/logical/message.c
index 8f9dc2f47c..2211a4b223 100644
--- a/src/backend/replication/logical/message.c
+++ b/src/backend/replication/logical/message.c
@@ -73,7 +73,7 @@ LogLogicalMessage(const char *prefix, const char *message, size_t size,
XLogRegisterData((char *) message, size);
/* allow origin filtering */
- XLogIncludeOrigin();
+ XLogSetRecordFlags(XLOG_INCLUDE_ORIGIN);
return XLogInsert(RM_LOGICALMSG_ID, XLOG_LOGICAL_MESSAGE);
}
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index 875dcec1eb..112fe07677 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -961,10 +961,11 @@ LogStandbySnapshot(void)
/*
* Record an enhanced snapshot of running transactions into WAL.
*
- * The definitions of RunningTransactionsData and xl_xact_running_xacts
- * are similar. We keep them separate because xl_xact_running_xacts
- * is a contiguous chunk of memory and never exists fully until it is
- * assembled in WAL.
+ * The definitions of RunningTransactionsData and xl_xact_running_xacts are
+ * similar. We keep them separate because xl_xact_running_xacts is a
+ * contiguous chunk of memory and never exists fully until it is assembled in
+ * WAL. The inserted records are marked as not being important for durability,
+ * to avoid triggering superflous checkpoint / archiving activity.
*/
static XLogRecPtr
LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
@@ -981,6 +982,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
/* Header */
XLogBeginInsert();
+ XLogSetRecordFlags(XLOG_MARK_UNIMPORTANT);
XLogRegisterData((char *) (&xlrec), MinSizeOfXactRunningXacts);
/* array of TransactionIds */
@@ -1035,6 +1037,7 @@ LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks)
XLogBeginInsert();
XLogRegisterData((char *) &xlrec, offsetof(xl_standby_locks, locks));
XLogRegisterData((char *) locks, nlocks * sizeof(xl_standby_lock));
+ XLogSetRecordFlags(XLOG_MARK_UNIMPORTANT);
(void) XLogInsert(RM_STANDBY_ID, XLOG_STANDBY_LOCK);
}
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index c9f332c908..7d21408c4a 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -184,6 +184,13 @@ extern bool XLOG_DEBUG;
#define CHECKPOINT_CAUSE_XLOG 0x0040 /* XLOG consumption */
#define CHECKPOINT_CAUSE_TIME 0x0080 /* Elapsed time */
+/*
+ * Flag bits for the record being inserted, set using XLogSetRecordFlags().
+ */
+#define XLOG_INCLUDE_ORIGIN 0x01 /* include the replication origin */
+#define XLOG_MARK_UNIMPORTANT 0x02 /* record not important for durability */
+
+
/* Checkpoint statistics */
typedef struct CheckpointStatsData
{
@@ -211,7 +218,9 @@ extern CheckpointStatsData CheckpointStats;
struct XLogRecData;
-extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata, XLogRecPtr fpw_lsn);
+extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata,
+ XLogRecPtr fpw_lsn,
+ uint8 flags);
extern void XLogFlush(XLogRecPtr RecPtr);
extern bool XLogBackgroundFlush(void);
extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
@@ -262,6 +271,7 @@ extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p)
extern XLogRecPtr GetRedoRecPtr(void);
extern XLogRecPtr GetInsertRecPtr(void);
extern XLogRecPtr GetFlushRecPtr(void);
+extern XLogRecPtr GetLastImportantRecPtr(void);
extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch);
extern void RemovePromoteSignalFiles(void);
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index ceb0462098..05f996b127 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -283,8 +283,8 @@ extern const RmgrData RmgrTable[];
/*
* Exported to support xlog switching from checkpointer
*/
-extern pg_time_t GetLastSegSwitchTime(void);
-extern XLogRecPtr RequestXLogSwitch(void);
+extern pg_time_t GetLastSegSwitchData(XLogRecPtr *lastSwitchLSN);
+extern XLogRecPtr RequestXLogSwitch(bool mark_uninmportant);
extern void GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli);
diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h
index cc0177ef4e..307cfaaf47 100644
--- a/src/include/access/xloginsert.h
+++ b/src/include/access/xloginsert.h
@@ -40,7 +40,7 @@
/* prototypes for public functions in xloginsert.c: */
extern void XLogBeginInsert(void);
-extern void XLogIncludeOrigin(void);
+extern void XLogSetRecordFlags(uint8 flags);
extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info);
extern void XLogEnsureRecordSpace(int nbuffers, int ndatas);
extern void XLogRegisterData(char *data, int len);
--
cgit v1.2.3
From 909cb78a8cce73c9678ddb3269b56e31b56e084b Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Thu, 22 Dec 2016 12:00:00 -0500
Subject: doc: Further speed improvements for HTML XSLT build
---
doc/src/sgml/stylesheet-speedup-common.xsl | 6 ++++
doc/src/sgml/stylesheet-speedup-xhtml.xsl | 50 ++++++++++++++++++++++++++++++
2 files changed, 56 insertions(+)
(limited to 'doc/src')
diff --git a/doc/src/sgml/stylesheet-speedup-common.xsl b/doc/src/sgml/stylesheet-speedup-common.xsl
index 007fdf6d63..e3fb582a1c 100644
--- a/doc/src/sgml/stylesheet-speedup-common.xsl
+++ b/doc/src/sgml/stylesheet-speedup-common.xsl
@@ -91,4 +91,10 @@
+
+
+
+
+en
+
diff --git a/doc/src/sgml/stylesheet-speedup-xhtml.xsl b/doc/src/sgml/stylesheet-speedup-xhtml.xsl
index ff08bef808..da0f2b5a97 100644
--- a/doc/src/sgml/stylesheet-speedup-xhtml.xsl
+++ b/doc/src/sgml/stylesheet-speedup-xhtml.xsl
@@ -251,6 +251,56 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--
cgit v1.2.3
From 0a85c102254b72ec7ce16bc504206a1a5c84bd76 Mon Sep 17 00:00:00 2001
From: Joe Conway
Date: Thu, 22 Dec 2016 17:56:50 -0800
Subject: Improve RLS documentation with respect to COPY
Documentation for pg_restore said COPY TO does not support row security
when in fact it should say COPY FROM. Fix that.
While at it, make it clear that "COPY FROM" does not allow RLS to be
enabled and INSERT should be used instead. Also that SELECT policies
will apply to COPY TO statements.
Back-patch to 9.5 where RLS first appeared.
Author: Joe Conway
Reviewed-By: Dean Rasheed and Robert Haas
Discussion: https://postgr.es/m/5744FA24.3030008%40joeconway.com
---
doc/src/sgml/ref/copy.sgml | 9 +++++++++
doc/src/sgml/ref/pg_dump.sgml | 5 +++++
doc/src/sgml/ref/pg_restore.sgml | 2 +-
3 files changed, 15 insertions(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 2477a872e8..7ff62f2a82 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -424,6 +424,15 @@ COPY count
to have column privileges on the column(s) listed in the command.
+
+ If row-level security is enabled for the table, the relevant
+ SELECT policies will apply to COPY
+ table> TO statements.
+ Currently, COPY FROM is not supported for tables
+ with row-level security. Use equivalent INSERT
+ statements instead.
+
+
Files named in a COPY command are read or written
directly by the server, not by the client application. Therefore,
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index f6225d23c8..b70e7d57e9 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -718,6 +718,11 @@ PostgreSQL documentation
to dump the parts of the contents of the table that they have access to.
+
+ Note that if you use this option currently, you probably also want
+ the dump be in INSERT format, as the
+ COPY FROM during restore does not support row security.
+
diff --git a/doc/src/sgml/ref/pg_restore.sgml b/doc/src/sgml/ref/pg_restore.sgml
index bd5b405314..44f0515066 100644
--- a/doc/src/sgml/ref/pg_restore.sgml
+++ b/doc/src/sgml/ref/pg_restore.sgml
@@ -543,7 +543,7 @@
Note that this option currently also requires the dump be in INSERT
- format, as COPY TO does not support row security.
+ format, as COPY FROM does not support row security.
--
cgit v1.2.3
From e13486eba05cc46951a34263d19b65d1eca0176b Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Fri, 23 Dec 2016 07:35:01 -0500
Subject: Remove sql_inheritance GUC.
This backward-compatibility GUC is long overdue for removal.
Discussion: http://postgr.es/m/CA+TgmoYe+EG7LdYX6pkcNxr4ygkP4+A=jm9o-CPXyOvRiCNwaQ@mail.gmail.com
---
doc/src/sgml/config.sgml | 30 ---------------------------
doc/src/sgml/ddl.sgml | 8 +++----
doc/src/sgml/queries.sgml | 8 +++----
src/backend/commands/lockcmds.c | 2 +-
src/backend/commands/tablecmds.c | 8 +++----
src/backend/nodes/makefuncs.c | 2 +-
src/backend/parser/analyze.c | 4 ++--
src/backend/parser/gram.y | 9 ++++----
src/backend/parser/parse_clause.c | 26 +----------------------
src/backend/utils/misc/guc.c | 10 ---------
src/backend/utils/misc/postgresql.conf.sample | 1 -
src/include/nodes/primnodes.h | 3 +--
src/include/parser/parse_clause.h | 1 -
src/include/utils/guc.h | 1 -
14 files changed, 20 insertions(+), 93 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index b6b20a368e..8d7b3bfd66 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -7394,36 +7394,6 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir'
-
- sql_inheritance (boolean)
-
- sql_inheritance> configuration parameter
-
- inheritance>>
-
-
-
- This setting controls whether undecorated table references are
- considered to include inheritance child tables. The default is
- on>, which means child tables are included (thus,
- a *> suffix is assumed by default). If turned
- off>, child tables are not included (thus, an
- ONLY prefix is assumed). The SQL standard
- requires child tables to be included, so the off> setting
- is not spec-compliant, but it is provided for compatibility with
- PostgreSQL> releases prior to 7.1.
- See for more information.
-
-
-
- Turning sql_inheritance> off is deprecated, because that
- behavior has been found to be error-prone as well as contrary to SQL
- standard. Discussions of inheritance behavior elsewhere in this
- manual generally assume that it is on>.
-
-
-
-
standard_conforming_strings (boolean)
strings>standard conforming>>
diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml
index 7e1bc0e534..d7117cbc8f 100644
--- a/doc/src/sgml/ddl.sgml
+++ b/doc/src/sgml/ddl.sgml
@@ -2529,11 +2529,9 @@ SELECT name, altitude
WHERE altitude > 500;
- Writing *> is not necessary, since this behavior is
- the default (unless you have changed the setting of the
- configuration option).
- However writing *> might be useful to emphasize that
- additional tables will be searched.
+ Writing *> is not necessary, since this behavior is always
+ the default. However, this syntax is still supported for
+ compatibility with older releases where the default could be changed.
diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml
index 5cc6dbce11..0f84c12bec 100644
--- a/doc/src/sgml/queries.sgml
+++ b/doc/src/sgml/queries.sgml
@@ -145,11 +145,9 @@ FROM table_reference, table_r
Instead of writing ONLY> before the table name, you can write
*> after the table name to explicitly specify that descendant
- tables are included. Writing *> is not necessary since that
- behavior is the default (unless you have changed the setting of the configuration option). However writing
- *> might be useful to emphasize that additional tables will be
- searched.
+ tables are included. There is no real reason to use this syntax any more,
+ because searching descendent tables is now always the default behavior.
+ However, it is supported for compatibility with older releases.
diff --git a/src/backend/commands/lockcmds.c b/src/backend/commands/lockcmds.c
index 9e62e00b8d..ba1414ba64 100644
--- a/src/backend/commands/lockcmds.c
+++ b/src/backend/commands/lockcmds.c
@@ -54,7 +54,7 @@ LockTableCommand(LockStmt *lockstmt)
foreach(p, lockstmt->relations)
{
RangeVar *rv = (RangeVar *) lfirst(p);
- bool recurse = interpretInhOption(rv->inhOpt);
+ bool recurse = (rv->inhOpt == INH_YES);
Oid reloid;
reloid = RangeVarGetRelidExtended(rv, lockstmt->mode, false,
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index ee79b726f2..13b12f1903 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -1184,7 +1184,7 @@ ExecuteTruncate(TruncateStmt *stmt)
{
RangeVar *rv = lfirst(cell);
Relation rel;
- bool recurse = interpretInhOption(rv->inhOpt);
+ bool recurse = (rv->inhOpt == INH_YES);
Oid myrelid;
rel = heap_openrv(rv, AccessExclusiveLock);
@@ -2655,7 +2655,7 @@ renameatt(RenameStmt *stmt)
renameatt_internal(relid,
stmt->subname, /* old att name */
stmt->newname, /* new att name */
- interpretInhOption(stmt->relation->inhOpt), /* recursive? */
+ (stmt->relation->inhOpt == INH_YES), /* recursive? */
false, /* recursing? */
0, /* expected inhcount */
stmt->behavior);
@@ -2807,7 +2807,7 @@ RenameConstraint(RenameStmt *stmt)
rename_constraint_internal(relid, typid,
stmt->subname,
stmt->newname,
- stmt->relation ? interpretInhOption(stmt->relation->inhOpt) : false, /* recursive? */
+ (stmt->relation && stmt->relation->inhOpt == INH_YES), /* recursive? */
false, /* recursing? */
0 /* expected inhcount */ );
@@ -3050,7 +3050,7 @@ AlterTable(Oid relid, LOCKMODE lockmode, AlterTableStmt *stmt)
CheckTableNotInUse(rel, "ALTER TABLE");
ATController(stmt,
- rel, stmt->cmds, interpretInhOption(stmt->relation->inhOpt),
+ rel, stmt->cmds, (stmt->relation->inhOpt == INH_YES),
lockmode);
}
diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c
index 20e2dbda79..b64f7c6a85 100644
--- a/src/backend/nodes/makefuncs.c
+++ b/src/backend/nodes/makefuncs.c
@@ -423,7 +423,7 @@ makeRangeVar(char *schemaname, char *relname, int location)
r->catalogname = NULL;
r->schemaname = schemaname;
r->relname = relname;
- r->inhOpt = INH_DEFAULT;
+ r->inhOpt = INH_YES;
r->relpersistence = RELPERSISTENCE_PERMANENT;
r->alias = NULL;
r->location = location;
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 5e65fe75bd..601e22abfa 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -380,7 +380,7 @@ transformDeleteStmt(ParseState *pstate, DeleteStmt *stmt)
/* set up range table with just the result rel */
qry->resultRelation = setTargetTable(pstate, stmt->relation,
- interpretInhOption(stmt->relation->inhOpt),
+ (stmt->relation->inhOpt == INH_YES),
true,
ACL_DELETE);
@@ -2177,7 +2177,7 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt)
}
qry->resultRelation = setTargetTable(pstate, stmt->relation,
- interpretInhOption(stmt->relation->inhOpt),
+ (stmt->relation->inhOpt == INH_YES),
true,
ACL_UPDATE);
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 2ed7b5259d..931bc9aca6 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -28,12 +28,11 @@
* current transaction and are just parsing commands to find the next
* ROLLBACK or COMMIT. If you make use of SET variables, then you
* will do the wrong thing in multi-query strings like this:
- * SET SQL_inheritance TO off; SELECT * FROM foo;
+ * SET constraint_exclusion TO off; SELECT * FROM foo;
* because the entire string is parsed by gram.y before the SET gets
* executed. Anything that depends on the database or changeable state
* should be handled during parse analysis so that it happens at the
- * right time not the wrong time. The handling of SQL_inheritance is
- * a good example.
+ * right time not the wrong time.
*
* WARNINGS
* If you use a list, make sure the datum is a node so that the printing
@@ -11249,9 +11248,9 @@ join_qual: USING '(' name_list ')' { $$ = (Node *) $3; }
relation_expr:
qualified_name
{
- /* default inheritance */
+ /* inheritance query, implicitly */
$$ = $1;
- $$->inhOpt = INH_DEFAULT;
+ $$->inhOpt = INH_YES;
$$->alias = NULL;
}
| qualified_name '*'
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 751de4bddb..a96b3f9280 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -228,30 +228,6 @@ setTargetTable(ParseState *pstate, RangeVar *relation,
return rtindex;
}
-/*
- * Simplify InhOption (yes/no/default) into boolean yes/no.
- *
- * The reason we do things this way is that we don't want to examine the
- * SQL_inheritance option flag until parse_analyze() is run. Otherwise,
- * we'd do the wrong thing with query strings that intermix SET commands
- * with queries.
- */
-bool
-interpretInhOption(InhOption inhOpt)
-{
- switch (inhOpt)
- {
- case INH_NO:
- return false;
- case INH_YES:
- return true;
- case INH_DEFAULT:
- return SQL_inheritance;
- }
- elog(ERROR, "bogus InhOption value: %d", inhOpt);
- return false; /* keep compiler quiet */
-}
-
/*
* Given a relation-options list (of DefElems), return true iff the specified
* table/result set should be created with OIDs. This needs to be done after
@@ -437,7 +413,7 @@ transformTableEntry(ParseState *pstate, RangeVar *r)
/* We need only build a range table entry */
rte = addRangeTableEntry(pstate, r, r->alias,
- interpretInhOption(r->inhOpt), true);
+ (r->inhOpt == INH_YES), true);
return rte;
}
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index a02511754e..946ba9e73e 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -440,7 +440,6 @@ char *event_source;
bool row_security;
bool check_function_bodies = true;
bool default_with_oids = false;
-bool SQL_inheritance = true;
int log_min_error_statement = ERROR;
int log_min_messages = WARNING;
@@ -1321,15 +1320,6 @@ static struct config_bool ConfigureNamesBool[] =
false,
NULL, NULL, NULL
},
- {
- {"sql_inheritance", PGC_USERSET, COMPAT_OPTIONS_PREVIOUS,
- gettext_noop("Causes subtables to be included by default in various commands."),
- NULL
- },
- &SQL_inheritance,
- true,
- NULL, NULL, NULL
- },
{
{"transform_null_equals", PGC_USERSET, COMPAT_OPTIONS_CLIENT,
gettext_noop("Treats \"expr=NULL\" as \"expr IS NULL\"."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 2c638b2c09..ee8232f2f4 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -606,7 +606,6 @@
#lo_compat_privileges = off
#operator_precedence_warning = off
#quote_all_identifiers = off
-#sql_inheritance = on
#standard_conforming_strings = on
#synchronize_seqscans = on
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 65510b010b..d11f1120b0 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -45,8 +45,7 @@ typedef struct Alias
typedef enum InhOption
{
INH_NO, /* Do NOT scan child tables */
- INH_YES, /* DO scan child tables */
- INH_DEFAULT /* Use current SQL_inheritance option */
+ INH_YES /* DO scan child tables */
} InhOption;
/* What to do at commit time for temporary relations */
diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h
index d04ce1125c..549bde43b6 100644
--- a/src/include/parser/parse_clause.h
+++ b/src/include/parser/parse_clause.h
@@ -19,7 +19,6 @@
extern void transformFromClause(ParseState *pstate, List *frmList);
extern int setTargetTable(ParseState *pstate, RangeVar *relation,
bool inh, bool alsoSource, AclMode requiredPerms);
-extern bool interpretInhOption(InhOption inhOpt);
extern bool interpretOidsOption(List *defList, bool allowOids);
extern Node *transformWhereClause(ParseState *pstate, Node *clause,
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index 0bf9f21067..66a3915e8a 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -244,7 +244,6 @@ extern bool log_btree_build_stats;
extern PGDLLIMPORT bool check_function_bodies;
extern bool default_with_oids;
-extern bool SQL_inheritance;
extern int log_min_error_statement;
extern int log_min_messages;
--
cgit v1.2.3
From ff33d1456ea098e160cbbc74b332656c06abc2ab Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Fri, 23 Dec 2016 11:53:35 -0500
Subject: Spellcheck: s/descendent/descendant/g
I got a little annoyed by reading documentation paragraphs containing
both spellings within a few lines of each other. My dictionary says
"descendant" is the preferred spelling, and it's certainly the majority
usage in our tree, so standardize on that.
For one usage in parallel.sgml, I thought it better to rewrite to avoid
the term altogether.
---
doc/src/sgml/parallel.sgml | 8 ++++----
doc/src/sgml/queries.sgml | 2 +-
doc/src/sgml/ref/alter_table.sgml | 8 ++++----
src/backend/executor/execParallel.c | 6 +++---
4 files changed, 12 insertions(+), 12 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/parallel.sgml b/doc/src/sgml/parallel.sgml
index bca4886b21..5d4bb211c1 100644
--- a/doc/src/sgml/parallel.sgml
+++ b/doc/src/sgml/parallel.sgml
@@ -47,8 +47,8 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
child plan, which is the portion of the plan that will be executed in
parallel. If the Gather> node is at the very top of the plan
tree, then the entire query will execute in parallel. If it is somewhere
- else in the plan tree, then only that portion of the query will run in
- parallel. In the example above, the query accesses only one table, so
+ else in the plan tree, then only the portion of the plan below it will run
+ in parallel. In the example above, the query accesses only one table, so
there is only one plan node other than the Gather> node itself;
since that plan node is a child of the Gather> node, it will
run in parallel.
@@ -76,8 +76,8 @@ EXPLAIN SELECT * FROM pgbench_accounts WHERE filler LIKE '%x%';
Every background worker process which is successfully started for a given
- parallel query will execute the portion of the plan which is a descendent
- of the Gather> node. The leader will also execute that portion
+ parallel query will execute the portion of the plan below
+ the Gather> node. The leader will also execute that portion
of the plan, but it has an additional responsibility: it must also read
all of the tuples generated by the workers. When the parallel portion of
the plan generates only a small number of tuples, the leader will often
diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml
index 0f84c12bec..ef623d59bd 100644
--- a/doc/src/sgml/queries.sgml
+++ b/doc/src/sgml/queries.sgml
@@ -146,7 +146,7 @@ FROM table_reference, table_r
Instead of writing ONLY> before the table name, you can write
*> after the table name to explicitly specify that descendant
tables are included. There is no real reason to use this syntax any more,
- because searching descendent tables is now always the default behavior.
+ because searching descendant tables is now always the default behavior.
However, it is supported for compatibility with older releases.
diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml
index 44f1ec644f..da431f8369 100644
--- a/doc/src/sgml/ref/alter_table.sgml
+++ b/doc/src/sgml/ref/alter_table.sgml
@@ -1116,11 +1116,11 @@ ALTER TABLE [ IF EXISTS ] name
rename, or change the type of a column in the parent table without doing
same to the descendants. This ensures that the descendants always have
columns matching the parent. Similarly, a constraint cannot be renamed
- in the parent without also renaming it in all descendents, so that
- constraints also match between the parent and its descendents.
- Also, because selecting from the parent also selects from its descendents,
+ in the parent without also renaming it in all descendants, so that
+ constraints also match between the parent and its descendants.
+ Also, because selecting from the parent also selects from its descendants,
a constraint on the parent cannot be marked valid unless it is also marked
- valid for those descendents. In all of these cases, ALTER TABLE
+ valid for those descendants. In all of these cases, ALTER TABLE
ONLY will be rejected.
diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c
index 8a6f844e35..cc42946f9c 100644
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -518,7 +518,7 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, int nworkers)
}
/*
- * Copy instrumentation information about this node and its descendents from
+ * Copy instrumentation information about this node and its descendants from
* dynamic shared memory.
*/
static bool
@@ -666,7 +666,7 @@ ExecParallelGetQueryDesc(shm_toc *toc, DestReceiver *receiver,
}
/*
- * Copy instrumentation information from this node and its descendents into
+ * Copy instrumentation information from this node and its descendants into
* dynamic shared memory, so that the parallel leader can retrieve it.
*/
static bool
@@ -706,7 +706,7 @@ ExecParallelReportInstrumentation(PlanState *planstate,
}
/*
- * Initialize the PlanState and its descendents with the information
+ * Initialize the PlanState and its descendants with the information
* retrieved from shared memory. This has to be done once the PlanState
* is allocated and initialized by executor; that is, after ExecutorStart().
*/
--
cgit v1.2.3
From 3c9d398484fb6e188e665be8299d6e5e89924c94 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Fri, 23 Dec 2016 12:53:09 -0500
Subject: Doc: improve index entry for "median".
We had an index entry for "median" attached to the percentile_cont function
entry, which was pretty useless because a person following the link would
never realize that that function was the one they were being hinted to use.
Instead, make the index entry point at the example in syntax-aggregates,
and add a link to "percentile".
Also, since that example explicitly claims to be calculating the median,
make it use percentile_cont not percentile_disc. This makes no difference
in terms of the larger goals of that section, but so far as I can find,
nearly everyone thinks that "median" means the continuous not discrete
calculation.
Per gripe from Steven Winfield. Back-patch to 9.4 where we introduced
percentile_cont.
Discussion: https://postgr.es/m/20161223102056.25614.1166@wrigleys.postgresql.org
---
doc/src/sgml/func.sgml | 3 ---
doc/src/sgml/syntax.sgml | 14 ++++++++++++--
2 files changed, 12 insertions(+), 5 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 0f9c9bf129..10e31868ba 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -13774,9 +13774,6 @@ SELECT xmlagg(x) FROM (SELECT x FROM test ORDER BY y DESC) AS tab;
percentilecontinuous
-
- median
- percentile_cont(fraction) WITHIN GROUP (ORDER BY sort_expression)
diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml
index 000da39250..4ea667bd52 100644
--- a/doc/src/sgml/syntax.sgml
+++ b/doc/src/sgml/syntax.sgml
@@ -1694,11 +1694,21 @@ SELECT string_agg(a ORDER BY a, ',') FROM table; -- incorrect
case, write just ()> not (*)>.
(PostgreSQL> will actually accept either spelling, but
only the first way conforms to the SQL standard.)
+
+
+
+
+ median
+
+
+ median
+ percentile
+
An example of an ordered-set aggregate call is:
-SELECT percentile_disc(0.5) WITHIN GROUP (ORDER BY income) FROM households;
- percentile_disc
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY income) FROM households;
+ percentile_cont
-----------------
50489
--
cgit v1.2.3
From 71f996d22125eb6cfdbee6094f44370aa8dec610 Mon Sep 17 00:00:00 2001
From: Andrew Dunstan
Date: Tue, 27 Dec 2016 11:23:46 -0500
Subject: Explain unaccounted for space in pgstattuple.
In addition to space accounted for by tuple_len, dead_tuple_len and
free_space, the table_len includes page overhead, the item pointers
table and padding bytes.
Backpatch to live branches.
---
doc/src/sgml/pgstattuple.sgml | 10 ++++++++++
1 file changed, 10 insertions(+)
(limited to 'doc/src')
diff --git a/doc/src/sgml/pgstattuple.sgml b/doc/src/sgml/pgstattuple.sgml
index 9ada5d209a..d2fa524d6e 100644
--- a/doc/src/sgml/pgstattuple.sgml
+++ b/doc/src/sgml/pgstattuple.sgml
@@ -117,6 +117,16 @@ free_percent | 1.95
+
+
+ The table_len will always be greater than the sum
+ of the tuple_len, dead_tuple_len
+ and free_space. The difference is accounted for by
+ fixed page overhead, the per-page table of pointers to tuples, and
+ padding to ensure that tuples are correctly aligned.
+
+
+
pgstattuple acquires only a read lock on the
relation. So the results do not reflect an instantaneous snapshot;
--
cgit v1.2.3
From 67a875355e4a6db294e9652af5a883876ddeb4a5 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Mon, 2 Jan 2017 12:26:03 -0500
Subject: In pgbench logging, avoid assuming that instr_times match Unix
timestamps.
For aggregated logging, pg_bench supposed that printing the integer part of
INSTR_TIME_GET_DOUBLE() would produce a Unix timestamp. That was already
broken on Windows, and it's about to get broken on most other platforms as
well. As in commit 74baa1e3b, we can remove the entanglement at the price
of one extra syscall per transaction; though here it seems more convenient
to use time(NULL) instead of gettimeofday(), since we only need
integral-second precision.
I took the time to do some wordsmithing on the documentation about
pgbench's logging features, too.
Discussion: https://postgr.es/m/8837.1483216839@sss.pgh.pa.us
---
doc/src/sgml/ref/pgbench.sgml | 96 ++++++++++++++++++++++++-------------------
src/bin/pgbench/pgbench.c | 62 ++++++++++++++--------------
2 files changed, 84 insertions(+), 74 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 3a65729bf3..3fb29f8c1d 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -355,7 +355,7 @@ pgbench options> dbname>
--log
- Write the time taken by each transaction to a log file.
+ Write information about each transaction to a log file.
See below for details.
@@ -572,13 +572,9 @@ pgbench options> dbname>
--aggregate-interval=seconds>
- Length of aggregation interval (in seconds). May be used only together
- with -l - with this option, the log contains
- per-interval summary (number of transactions, min/max latency and two
- additional fields useful for variance estimation).
-
-
- This option is not currently supported on Windows.
+ Length of aggregation interval (in seconds). May be used only
+ with
-l
option. With this option, the log contains
+ per-interval summary data, as described below.
@@ -618,8 +614,8 @@ pgbench options> dbname>
--log-prefix=prefix>
- Set the filename prefix for the transaction log file created by
-
--log>. The default is pgbench_log>.
+ Set the filename prefix for the transaction log files created by
+
--log>. The default is pgbench_log>.
@@ -1128,38 +1124,44 @@ END;
Per-Transaction Logging
- With the
-l> option but without the
--aggregate-interval
,
- pgbench> writes the time taken by each transaction
+ With the
-l> option (but without
+ the
--aggregate-interval
option),
+ pgbench> writes information about each transaction
to a log file. The log file will be named
prefix>.nnn>,
where prefix> defaults to pgbench_log>, and
nnn> is the PID of the
- pgbench process. If the
-j> option is 2 or higher,
- creating multiple worker threads, each will have its own log file. The first worker will
+ pgbench process.
+ The prefix can be changed by using the
--log-prefix> option.
+ If the
-j> option is 2 or higher, so that there are multiple
+ worker threads, each will have its own log file. The first worker will
use the same name for its log file as in the standard single worker case.
The additional log files for the other workers will be named
- pgbench_log>.nnn>.mmm>,
+ prefix>.nnn>.mmm>,
where mmm> is a sequential number for each worker starting
- with 1. The prefix can be changed by using the
--log-prefix>
- option.
+ with 1.
The format of the log is:
-client_id> transaction_no> time> script_no> time_epoch> time_us> schedule_lag
+client_id> transaction_no> time> script_no> time_epoch> time_us> schedule_lag
- where time> is the total elapsed transaction time in microseconds,
+ where
+ client_id> indicates which client session ran the transaction,
+ transaction_no> counts how many transactions have been
+ run by that session,
+ time> is the total elapsed transaction time in microseconds,
script_no> identifies which script file was used (useful when
multiple scripts were specified with
-f> or
-b>),
and time_epoch>/time_us> are a
- Unix epoch format time stamp and an offset
+ Unix-epoch time stamp and an offset
in microseconds (suitable for creating an ISO 8601
time stamp with fractional seconds) showing when
the transaction completed.
- Field schedule_lag> is the difference between the
+ The schedule_lag> field is the difference between the
transaction's scheduled start time, and the time it actually started, in
microseconds. It is only present when the
--rate> option is used.
When both
--rate> and
--latency-limit> are used,
@@ -1168,7 +1170,7 @@ END;
- Here is a snippet of the log file generated:
+ Here is a snippet of a log file generated in a single-client run:
0 199 2241 0 1175850568 995598
0 200 2465 0 1175850568 998079
@@ -1176,7 +1178,8 @@ END;
0 202 2038 0 1175850569 2663
- Another example with --rate=100 and --latency-limit=5 (note the additional
+ Another example with --rate=100>
+ and --latency-limit=5> (note the additional
schedule_lag> column):
0 81 4621 0 1412881037 912698 3005
@@ -1203,32 +1206,41 @@ END;
Aggregated Logging
- With the
--aggregate-interval
option, the logs use a bit different format:
+ With the
--aggregate-interval
option, a different
+ format is used for the log files:
-interval_start> num_of_transactions> latency_sum> latency_2_sum> min_latency> max_latency> lag_sum> lag_2_sum> min_lag> max_lag> skipped_transactions>
+interval_start> num_transactions> sum_latency> sum_latency_2> min_latency> max_latency> sum_lag> sum_lag_2> min_lag> max_lag> skipped>
- where interval_start> is the start of the interval (Unix epoch
- format time stamp), num_of_transactions> is the number of transactions
- within the interval, latency_sum is a sum of latencies
- (so you can compute average latency easily). The following two fields are useful
- for variance estimation - latency_sum> is a sum of latencies and
- latency_2_sum> is a sum of 2nd powers of latencies. The next two
- fields are min_latency> - a minimum latency within the interval, and
- max_latency> - maximum latency within the interval. A transaction is
- counted into the interval when it was committed. The fields in the end,
- lag_sum>, lag_2_sum>, min_lag>,
+ where
+ interval_start> is the start of the interval (as a Unix
+ epoch time stamp),
+ num_transactions> is the number of transactions
+ within the interval,
+ sum_latency is the sum of the transaction
+ latencies within the interval,
+ sum_latency_2 is the sum of squares of the
+ transaction latencies within the interval,
+ min_latency> is the minimum latency within the interval,
+ and
+ max_latency> is the maximum latency within the interval.
+ The next fields,
+ sum_lag>, sum_lag_2>, min_lag>,
and max_lag>, are only present if the
--rate>
- option is used. The very last one, skipped_transactions>,
- is only present if the option
--latency-limit> is present, too.
- They are calculated from the time each transaction had to wait for the
+ option is used.
+ They provide statistics about the time each transaction had to wait for the
previous one to finish, i.e. the difference between each transaction's
scheduled start time and the time it actually started.
+ The very last field, skipped>,
+ is only present if the
--latency-limit> option is used, too.
+ It counts the number of transactions skipped because they would have
+ started too late.
+ Each transaction is counted in the interval when it was committed.
- Here is example output:
+ Here is some example output:
1345828501 5601 1542744 483552416 61 2573
1345828503 7884 1979812 565806736 60 1479
@@ -1238,9 +1250,9 @@ END;
- Notice that while the plain (unaggregated) log file contains a reference
- to the custom script files, the aggregated log does not. Therefore if
- you need per script data, you need to aggregate the data on your own.
+ Notice that while the plain (unaggregated) log file shows which script
+ was used for each transaction, the aggregated log does not. Therefore if
+ you need per-script data, you need to aggregate the data on your own.
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 1f05650c66..16a32e21f5 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -42,6 +42,7 @@
#include
#include
#include
+#include
#include
#ifdef HAVE_SYS_SELECT_H
#include
@@ -227,7 +228,7 @@ typedef struct SimpleStats
*/
typedef struct StatsData
{
- long start_time; /* interval start time, for aggregates */
+ time_t start_time; /* interval start time, for aggregates */
int64 cnt; /* number of transactions */
int64 skipped; /* number of transactions skipped under --rate
* and --latency-limit */
@@ -449,7 +450,7 @@ static const BuiltinScript builtin_script[] =
static void setIntValue(PgBenchValue *pv, int64 ival);
static void setDoubleValue(PgBenchValue *pv, double dval);
static bool evaluateExpr(TState *, CState *, PgBenchExpr *, PgBenchValue *);
-static void doLog(TState *thread, CState *st, instr_time *now,
+static void doLog(TState *thread, CState *st,
StatsData *agg, bool skipped, double latency, double lag);
static void processXactStats(TState *thread, CState *st, instr_time *now,
bool skipped, StatsData *agg);
@@ -780,7 +781,7 @@ mergeSimpleStats(SimpleStats *acc, SimpleStats *ss)
* the given value.
*/
static void
-initStats(StatsData *sd, double start_time)
+initStats(StatsData *sd, time_t start_time)
{
sd->start_time = start_time;
sd->cnt = 0;
@@ -2425,10 +2426,15 @@ doCustom(TState *thread, CState *st, StatsData *agg)
}
/*
- * print log entry after completing one transaction.
+ * Print log entry after completing one transaction.
+ *
+ * We print Unix-epoch timestamps in the log, so that entries can be
+ * correlated against other logs. On some platforms this could be obtained
+ * from the instr_time reading the caller has, but rather than get entangled
+ * with that, we just eat the cost of an extra syscall in all cases.
*/
static void
-doLog(TState *thread, CState *st, instr_time *now,
+doLog(TState *thread, CState *st,
StatsData *agg, bool skipped, double latency, double lag)
{
FILE *logfile = thread->logfile;
@@ -2447,15 +2453,17 @@ doLog(TState *thread, CState *st, instr_time *now,
if (agg_interval > 0)
{
/*
- * Loop until we reach the interval of the current transaction, and
- * print all the empty intervals in between (this may happen with very
- * low tps, e.g. --rate=0.1).
+ * Loop until we reach the interval of the current moment, and print
+ * any empty intervals in between (this may happen with very low tps,
+ * e.g. --rate=0.1).
*/
- while (agg->start_time + agg_interval < INSTR_TIME_GET_DOUBLE(*now))
+ time_t now = time(NULL);
+
+ while (agg->start_time + agg_interval <= now)
{
/* print aggregated report to logfile */
fprintf(logfile, "%ld " INT64_FORMAT " %.0f %.0f %.0f %.0f",
- agg->start_time,
+ (long) agg->start_time,
agg->cnt,
agg->latency.sum,
agg->latency.sum2,
@@ -2485,12 +2493,6 @@ doLog(TState *thread, CState *st, instr_time *now,
/* no, print raw transactions */
struct timeval tv;
- /*
- * We print the current system timestamp in the log, so that entries
- * can be correlated against other logs. On some platforms this is
- * available in *now, but rather than get entangled with that, we just
- * eat the cost of an extra syscall in all cases.
- */
gettimeofday(&tv, NULL);
if (skipped)
fprintf(logfile, "%d " INT64_FORMAT " skipped %d %ld %ld",
@@ -2518,7 +2520,7 @@ processXactStats(TState *thread, CState *st, instr_time *now,
double latency = 0.0,
lag = 0.0;
- if ((!skipped || agg_interval) && INSTR_TIME_IS_ZERO(*now))
+ if ((!skipped) && INSTR_TIME_IS_ZERO(*now))
INSTR_TIME_SET_CURRENT(*now);
if (!skipped)
@@ -2540,7 +2542,7 @@ processXactStats(TState *thread, CState *st, instr_time *now,
thread->stats.cnt++;
if (use_log)
- doLog(thread, st, now, agg, skipped, latency, lag);
+ doLog(thread, st, agg, skipped, latency, lag);
/* XXX could use a mutex here, but we choose not to */
if (per_script_stats)
@@ -3202,7 +3204,7 @@ ParseScript(const char *script, const char *desc, int weight)
ps.desc = desc;
ps.weight = weight;
ps.commands = (Command **) pg_malloc(sizeof(Command *) * alloc_num);
- initStats(&ps.stats, 0.0);
+ initStats(&ps.stats, 0);
/* Prepare to parse script */
sstate = psql_scan_create(&pgbench_callbacks);
@@ -3972,10 +3974,6 @@ main(int argc, char **argv)
}
break;
case 5:
-#ifdef WIN32
- fprintf(stderr, "--aggregate-interval is not currently supported on Windows\n");
- exit(1);
-#else
benchmarking_option_set = true;
agg_interval = atoi(optarg);
if (agg_interval <= 0)
@@ -3984,7 +3982,6 @@ main(int argc, char **argv)
optarg);
exit(1);
}
-#endif
break;
case 6:
progress_timestamp = true;
@@ -4267,7 +4264,7 @@ main(int argc, char **argv)
thread->random_state[2] = random();
thread->logfile = NULL; /* filled in later */
thread->latency_late = 0;
- initStats(&thread->stats, 0.0);
+ initStats(&thread->stats, 0);
nclients_dealt += thread->nstate;
}
@@ -4321,7 +4318,7 @@ main(int argc, char **argv)
#endif /* ENABLE_THREAD_SAFETY */
/* wait for threads and accumulate results */
- initStats(&stats, 0.0);
+ initStats(&stats, 0);
INSTR_TIME_SET_ZERO(conn_total_time);
for (i = 0; i < nthreads; i++)
{
@@ -4394,6 +4391,9 @@ threadRun(void *arg)
INSTR_TIME_SET_ZERO(thread->conn_time);
+ initStats(&aggs, time(NULL));
+ last = aggs;
+
/* open log file if requested */
if (use_log)
{
@@ -4429,9 +4429,6 @@ threadRun(void *arg)
INSTR_TIME_SET_CURRENT(thread->conn_time);
INSTR_TIME_SUBTRACT(thread->conn_time, thread->start_time);
- initStats(&aggs, INSTR_TIME_GET_DOUBLE(thread->start_time));
- last = aggs;
-
/* explicitly initialize the state machines */
for (i = 0; i < nstate; i++)
{
@@ -4635,7 +4632,7 @@ threadRun(void *arg)
* (If a read from a 64-bit integer is not atomic, you might
* get a "torn" read and completely bogus latencies though!)
*/
- initStats(&cur, 0.0);
+ initStats(&cur, 0);
for (i = 0; i < nthreads; i++)
{
mergeSimpleStats(&cur.latency, &thread[i].stats.latency);
@@ -4695,12 +4692,13 @@ done:
INSTR_TIME_ACCUM_DIFF(thread->conn_time, end, start);
if (thread->logfile)
{
- if (agg_interval)
+ if (agg_interval > 0)
{
/* log aggregated but not yet reported transactions */
- doLog(thread, state, &end, &aggs, false, 0, 0);
+ doLog(thread, state, &aggs, false, 0, 0);
}
fclose(thread->logfile);
+ thread->logfile = NULL;
}
return NULL;
}
--
cgit v1.2.3
From de41869b64d57160f58852eab20a27f248188135 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Mon, 2 Jan 2017 21:37:12 -0500
Subject: Allow SSL configuration to be updated at SIGHUP.
It is no longer necessary to restart the server to enable, disable,
or reconfigure SSL. Instead, we just create a new SSL_CTX struct
(by re-reading all relevant files) whenever we get SIGHUP. Testing
shows that this is fast enough that it shouldn't be a problem.
In conjunction with that, downgrade the logic that complains about
pg_hba.conf "hostssl" lines when SSL isn't active: now that's just
a warning condition not an error.
An issue that still needs to be addressed is what shall we do with
passphrase-protected server keys? As this stands, the server would
demand the passphrase again on every SIGHUP, which is certainly
impractical. But the case was only barely supported before, so that
does not seem a sufficient reason to hold up committing this patch.
Andreas Karlsson, reviewed by Michael Banck and Michael Paquier
Discussion: https://postgr.es/m/556A6E8A.9030400@proxel.se
---
doc/src/sgml/client-auth.sgml | 4 +-
doc/src/sgml/config.sgml | 84 ++++---
doc/src/sgml/runtime.sgml | 19 +-
src/backend/libpq/auth.c | 22 +-
src/backend/libpq/be-secure-openssl.c | 315 +++++++++++++++++---------
src/backend/libpq/be-secure.c | 23 +-
src/backend/libpq/hba.c | 29 +--
src/backend/postmaster/postmaster.c | 45 +++-
src/backend/utils/misc/guc.c | 18 +-
src/backend/utils/misc/postgresql.conf.sample | 15 +-
src/include/libpq/libpq-be.h | 3 +-
src/include/libpq/libpq.h | 2 +-
src/test/ssl/ServerSetup.pm | 58 +++--
13 files changed, 400 insertions(+), 237 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/client-auth.sgml b/doc/src/sgml/client-auth.sgml
index 960f5b5871..dda5891900 100644
--- a/doc/src/sgml/client-auth.sgml
+++ b/doc/src/sgml/client-auth.sgml
@@ -156,9 +156,11 @@ hostnossl databaseuser
To make use of this option the server must be built with
SSL support. Furthermore,
- SSL must be enabled at server start time
+ SSL must be enabled
by setting the configuration parameter (see
for more information).
+ Otherwise, the hostssl record is ignored except for
+ logging a warning that it cannot match any connections.
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 8d7b3bfd66..30dd54cd5d 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -958,10 +958,10 @@ include_dir 'conf.d'
Enables SSL> connections. Please read
- before using this. The default
- is off>. This parameter can only be set at server
- start. SSL> communication is only possible with
- TCP/IP connections.
+ before using this.
+ This parameter can only be set in the postgresql.conf>
+ file or on the server command line.
+ The default is off>.
@@ -975,11 +975,16 @@ include_dir 'conf.d'
Specifies the name of the file containing the SSL server certificate
- authority (CA). The default is empty, meaning no CA file is loaded,
- and client certificate verification is not performed. (In previous
- releases of PostgreSQL, the name of this file was hard-coded
- as root.crt.) Relative paths are relative to the
- data directory. This parameter can only be set at server start.
+ authority (CA).
+ Relative paths are relative to the data directory.
+ This parameter can only be set in the postgresql.conf>
+ file or on the server command line.
+ The default is empty, meaning no CA file is loaded,
+ and client certificate verification is not performed.
+
+
+ In previous releases of PostgreSQL, the name of this file was
+ hard-coded as root.crt.
@@ -993,9 +998,10 @@ include_dir 'conf.d'
Specifies the name of the file containing the SSL server certificate.
- The default is server.crt. Relative paths are
- relative to the data directory. This parameter can only be set at
- server start.
+ Relative paths are relative to the data directory.
+ This parameter can only be set in the postgresql.conf>
+ file or on the server command line.
+ The default is server.crt.
@@ -1009,11 +1015,15 @@ include_dir 'conf.d'
Specifies the name of the file containing the SSL server certificate
- revocation list (CRL). The default is empty, meaning no CRL file is
- loaded. (In previous releases of PostgreSQL, the name of this file was
- hard-coded as root.crl.) Relative paths are
- relative to the data directory. This parameter can only be set at
- server start.
+ revocation list (CRL).
+ Relative paths are relative to the data directory.
+ This parameter can only be set in the postgresql.conf>
+ file or on the server command line.
+ The default is empty, meaning no CRL file is loaded.
+
+
+ In previous releases of PostgreSQL, the name of this file was
+ hard-coded as root.crl.
@@ -1027,9 +1037,10 @@ include_dir 'conf.d'
Specifies the name of the file containing the SSL server private key.
- The default is server.key. Relative paths are
- relative to the data directory. This parameter can only be set at
- server start.
+ Relative paths are relative to the data directory.
+ This parameter can only be set in the postgresql.conf>
+ file or on the server command line.
+ The default is server.key.
@@ -1046,10 +1057,12 @@ include_dir 'conf.d'
used on secure connections. See
the ciphers> manual page
in the OpenSSL> package for the syntax of this setting
- and a list of supported values. The default value is
- HIGH:MEDIUM:+3DES:!aNULL>. It is usually reasonable,
- unless you have specific security requirements. This parameter can only
- be set at server start.
+ and a list of supported values.
+ This parameter can only be set in the postgresql.conf>
+ file or on the server command line.
+ The default value is HIGH:MEDIUM:+3DES:!aNULL>. The
+ default is usually a reasonable choice unless you have specific
+ security requirements.
@@ -1113,7 +1126,7 @@ include_dir 'conf.d'
- ssl_prefer_server_ciphers (bool)
+ ssl_prefer_server_ciphers (boolean)
ssl_prefer_server_ciphers> configuration parameter
@@ -1121,8 +1134,10 @@ include_dir 'conf.d'
Specifies whether to use the server's SSL cipher preferences, rather
- than the client's. The default is true. This parameter can only be
- set at server start.
+ than the client's.
+ This parameter can only be set in the postgresql.conf>
+ file or on the server command line.
+ The default is true>.
@@ -1145,19 +1160,18 @@ include_dir 'conf.d'
Specifies the name of the curve to use in ECDH> key
exchange. It needs to be supported by all clients that connect.
- It does not need to be same curve as used by server's Elliptic
- Curve key. The default is prime256v1>. This parameter
- can only be set at server start.
+ It does not need to be the same curve used by the server's Elliptic
+ Curve key.
+ This parameter can only be set in the postgresql.conf>
+ file or on the server command line.
+ The default is prime256v1>.
- OpenSSL names for most common curves:
+ OpenSSL names for the most common curves are:
prime256v1> (NIST P-256),
secp384r1> (NIST P-384),
secp521r1> (NIST P-521).
-
-
-
The full list of available curves can be shown with the command
openssl ecparam -list_curves. Not all of them
are usable in TLS> though.
@@ -3003,7 +3017,7 @@ include_dir 'conf.d'
- track_commit_timestamp (bool)
+ track_commit_timestamp (boolean)
track_commit_timestamp> configuration parameter
diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml
index 787cfce987..65c7809332 100644
--- a/doc/src/sgml/runtime.sgml
+++ b/doc/src/sgml/runtime.sgml
@@ -2285,11 +2285,20 @@ pg_dumpall -p 5432 | psql -d postgres -p 5433
-
-
- Using this option is equivalent of using -X with
- method fetch.
-
-
-
-
-X method
--xlog-method=method
@@ -302,16 +289,26 @@ PostgreSQL documentation
Includes the required transaction log files (WAL files) in the
backup. This will include all transaction logs generated during
- the backup. If this option is specified, it is possible to start
- a postmaster directly in the extracted directory without the need
- to consult the log archive, thus making this a completely standalone
- backup.
+ the backup. Unless the method none is specified,
+ it is possible to start a postmaster directly in the extracted
+ directory without the need to consult the log archive, thus
+ making this a completely standalone backup.
The following methods for collecting the transaction logs are
supported:
+
+ n
+ none
+
+
+ Don't include transaction log in the backup.
+
+
+
+
ffetch
@@ -349,6 +346,9 @@ PostgreSQL documentation
named pg_wal.tar (if the server is a version
earlier than 10, the file will be named pg_xlog.tar).
+
+ This value is the default.
+
@@ -699,7 +699,7 @@ PostgreSQL documentation
To create a backup of a single-tablespace local database and compress
this with bzip2:
-$pg_basebackup -D - -Ft | bzip2 > backup.tar.bz2
+$pg_basebackup -D - -Ft -X fetch | bzip2 > backup.tar.bz2
(This command will fail if there are multiple tablespaces in the
database.)
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 6c5c508e39..3f83d87e50 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -71,8 +71,8 @@ static bool noclean = false;
static bool showprogress = false;
static int verbose = 0;
static int compresslevel = 0;
-static bool includewal = false;
-static bool streamwal = false;
+static bool includewal = true;
+static bool streamwal = true;
static bool fastcheckpoint = false;
static bool writerecoveryconf = false;
static bool do_sync = true;
@@ -325,8 +325,7 @@ usage(void)
printf(_(" -S, --slot=SLOTNAME replication slot to use\n"));
printf(_(" -T, --tablespace-mapping=OLDDIR=NEWDIR\n"
" relocate tablespace in OLDDIR to NEWDIR\n"));
- printf(_(" -x, --xlog include required WAL files in backup (fetch mode)\n"));
- printf(_(" -X, --xlog-method=fetch|stream\n"
+ printf(_(" -X, --xlog-method=none|fetch|stream\n"
" include required WAL files with specified method\n"));
printf(_(" --xlogdir=XLOGDIR location for the transaction log directory\n"));
printf(_(" -z, --gzip compress tar output\n"));
@@ -1700,7 +1699,11 @@ BaseBackup(void)
*/
if (streamwal && !CheckServerVersionForStreaming(conn))
{
- /* Error message already written in CheckServerVersionForStreaming() */
+ /*
+ * Error message already written in CheckServerVersionForStreaming(),
+ * but add a hint about using -X none.
+ */
+ fprintf(stderr, _("HINT: use -X none or -X fetch to disable log streaming\n"));
disconnect_and_exit(1);
}
@@ -2035,7 +2038,6 @@ main(int argc, char **argv)
{"write-recovery-conf", no_argument, NULL, 'R'},
{"slot", required_argument, NULL, 'S'},
{"tablespace-mapping", required_argument, NULL, 'T'},
- {"xlog", no_argument, NULL, 'x'},
{"xlog-method", required_argument, NULL, 'X'},
{"gzip", no_argument, NULL, 'z'},
{"compress", required_argument, NULL, 'Z'},
@@ -2078,7 +2080,7 @@ main(int argc, char **argv)
atexit(cleanup_directories_atexit);
- while ((c = getopt_long(argc, argv, "D:F:r:RT:xX:l:nNzZ:d:c:h:p:U:s:S:wWvP",
+ while ((c = getopt_long(argc, argv, "D:F:r:RT:X:l:nNzZ:d:c:h:p:U:s:S:wWvP",
long_options, &option_index)) != -1)
{
switch (c)
@@ -2111,38 +2113,29 @@ main(int argc, char **argv)
case 'T':
tablespace_list_append(optarg);
break;
- case 'x':
- if (includewal)
- {
- fprintf(stderr,
- _("%s: cannot specify both --xlog and --xlog-method\n"),
- progname);
- exit(1);
- }
-
- includewal = true;
- streamwal = false;
- break;
case 'X':
- if (includewal)
+ if (strcmp(optarg, "n") == 0 ||
+ strcmp(optarg, "none") == 0)
{
- fprintf(stderr,
- _("%s: cannot specify both --xlog and --xlog-method\n"),
- progname);
- exit(1);
+ includewal = false;
+ streamwal = false;
}
-
- includewal = true;
- if (strcmp(optarg, "f") == 0 ||
+ else if (strcmp(optarg, "f") == 0 ||
strcmp(optarg, "fetch") == 0)
+ {
+ includewal = true;
streamwal = false;
+ }
else if (strcmp(optarg, "s") == 0 ||
strcmp(optarg, "stream") == 0)
+ {
+ includewal = true;
streamwal = true;
+ }
else
{
fprintf(stderr,
- _("%s: invalid xlog-method option \"%s\", must be \"fetch\" or \"stream\"\n"),
+ _("%s: invalid xlog-method option \"%s\", must be \"fetch\", \"stream\" or \"none\"\n"),
progname, optarg);
exit(1);
}
diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
index 7811093100..4c6670ce72 100644
--- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl
+++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
@@ -4,7 +4,7 @@ use Cwd;
use Config;
use PostgresNode;
use TestLib;
-use Test::More tests => 69;
+use Test::More tests => 71;
program_help_ok('pg_basebackup');
program_version_ok('pg_basebackup');
@@ -63,7 +63,7 @@ foreach my $filename (qw(backup_label tablespace_map postgresql.auto.conf.tmp))
close FILE;
}
-$node->command_ok([ 'pg_basebackup', '-D', "$tempdir/backup" ],
+$node->command_ok([ 'pg_basebackup', '-D', "$tempdir/backup", '-X', 'none' ],
'pg_basebackup runs');
ok(-f "$tempdir/backup/PG_VERSION", 'backup was created');
@@ -225,6 +225,11 @@ like(
qr/^primary_conninfo = '.*port=$port.*'\n/m,
'recovery.conf sets primary_conninfo');
+$node->command_ok([ 'pg_basebackup', '-D', "$tempdir/backupxd" ],
+ 'pg_basebackup runs in default xlog mode');
+ok(grep(/^[0-9A-F]{24}$/, slurp_dir("$tempdir/backupxd/pg_wal")),
+ 'WAL files copied');
+
$node->command_ok(
[ 'pg_basebackup', '-D', "$tempdir/backupxf", '-X', 'fetch' ],
'pg_basebackup -X fetch runs');
diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm
index c1b16ca9e9..f3c38bce0f 100644
--- a/src/test/perl/PostgresNode.pm
+++ b/src/test/perl/PostgresNode.pm
@@ -484,7 +484,7 @@ sub backup
print "# Taking pg_basebackup $backup_name from node \"$name\"\n";
TestLib::system_or_bail('pg_basebackup', '-D', $backup_path, '-p', $port,
- '-x', '--no-sync');
+ '--no-sync');
print "# Backup finished\n";
}
--
cgit v1.2.3
From 6667d9a6d77b9a6eac89638ac363b6d03da253c1 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Wed, 4 Jan 2017 12:43:52 -0500
Subject: Re-allow SSL passphrase prompt at server start, but not thereafter.
Leave OpenSSL's default passphrase collection callback in place during
the first call of secure_initialize() in server startup. Although that
doesn't work terribly well in daemon contexts, some people feel we should
not break it for anyone who was successfully using it before. We still
block passphrase demands during SIGHUP, meaning that you can't adjust SSL
configuration on-the-fly if you used a passphrase, but this is no worse
than what it was before commit de41869b6. And we block passphrase demands
during EXEC_BACKEND reloads; that behavior wasn't useful either, but at
least now it's documented.
Tweak some related log messages for more readability, and avoid issuing
essentially duplicate messages about reload failure caused by a passphrase.
Discussion: https://postgr.es/m/29982.1483412575@sss.pgh.pa.us
---
doc/src/sgml/runtime.sgml | 19 +++++---
src/backend/libpq/be-secure-openssl.c | 84 ++++++++++++++++++++---------------
src/backend/libpq/be-secure.c | 10 ++---
src/backend/postmaster/postmaster.c | 8 ++--
src/include/libpq/libpq-be.h | 2 +-
src/include/libpq/libpq.h | 2 +-
6 files changed, 72 insertions(+), 53 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml
index 38f561886a..130c386462 100644
--- a/doc/src/sgml/runtime.sgml
+++ b/doc/src/sgml/runtime.sgml
@@ -2159,8 +2159,13 @@ pg_dumpall -p 5432 | psql -d postgres -p 5433
- The private key cannot be protected with a passphrase, as there is no
- way to supply the passphrase to the server.
+ If the private key is protected with a passphrase, the
+ server will prompt for the passphrase and will not start until it has
+ been entered.
+ Using a passphrase also disables the ability to change the server's SSL
+ configuration without a server restart.
+ Furthermore, passphrase-protected private keys cannot be used at all
+ on Windows.
@@ -2293,9 +2298,9 @@ pg_dumpall -p 5432 | psql -d postgres -p 5433
If an error in these files is detected at server start, the server will
refuse to start. But if an error is detected during a configuration
- reload, the files are ignored and the old values continue to be used.
- On Windows> systems, if an error in these
- files is detected at backend start, that backend will be unable to
+ reload, the files are ignored and the old SSL configuration continues to
+ be used. On Windows> systems, if an error in
+ these files is detected at backend start, that backend will be unable to
establish an SSL connection. In all these cases, the error condition is
reported in the server log.
@@ -2314,8 +2319,8 @@ openssl req -new -text -out server.req
you enter the local host name as Common Name>; the challenge
password can be left blank. The program will generate a key that is
passphrase protected; it will not accept a passphrase that is less
- than four characters long. To remove the passphrase again (as you must),
- next run the commands:
+ than four characters long. To remove the passphrase again (as you must
+ if you want automatic start-up of the server), next run the commands:
openssl rsa -in privkey.pem -out server.key
rm privkey.pem
diff --git a/src/backend/libpq/be-secure-openssl.c b/src/backend/libpq/be-secure-openssl.c
index 07341ff696..44c84a7869 100644
--- a/src/backend/libpq/be-secure-openssl.c
+++ b/src/backend/libpq/be-secure-openssl.c
@@ -78,13 +78,14 @@ static DH *tmp_dh_cb(SSL *s, int is_export, int keylength);
static int ssl_passwd_cb(char *buf, int size, int rwflag, void *userdata);
static int verify_cb(int, X509_STORE_CTX *);
static void info_cb(const SSL *ssl, int type, int args);
-static bool initialize_ecdh(SSL_CTX *context, bool failOnError);
+static bool initialize_ecdh(SSL_CTX *context, bool isServerStart);
static const char *SSLerrmessage(unsigned long ecode);
static char *X509_NAME_to_cstring(X509_NAME *name);
static SSL_CTX *SSL_context = NULL;
static bool SSL_initialized = false;
+static bool ssl_passwd_cb_called = false;
/* ------------------------------------------------------------ */
/* Hardcoded values */
@@ -159,12 +160,12 @@ KWbuHn491xNO25CQWMtem80uKw+pTnisBRF/454n1Jnhub144YRBoN8CAQI=\n\
/*
* Initialize global SSL context.
*
- * If failOnError is true, report any errors as FATAL (so we don't return).
- * Otherwise, log errors at LOG level and return -1 to indicate trouble.
- * Returns 0 if OK.
+ * If isServerStart is true, report any errors as FATAL (so we don't return).
+ * Otherwise, log errors at LOG level and return -1 to indicate trouble,
+ * preserving the old SSL state if any. Returns 0 if OK.
*/
int
-be_tls_init(bool failOnError)
+be_tls_init(bool isServerStart)
{
STACK_OF(X509_NAME) *root_cert_list = NULL;
SSL_CTX *context;
@@ -192,7 +193,7 @@ be_tls_init(bool failOnError)
context = SSL_CTX_new(SSLv23_method());
if (!context)
{
- ereport(failOnError ? FATAL : LOG,
+ ereport(isServerStart ? FATAL : LOG,
(errmsg("could not create SSL context: %s",
SSLerrmessage(ERR_get_error()))));
goto error;
@@ -205,16 +206,21 @@ be_tls_init(bool failOnError)
SSL_CTX_set_mode(context, SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER);
/*
- * Override OpenSSL's default handling of passphrase-protected files.
+ * If reloading, override OpenSSL's default handling of
+ * passphrase-protected files, because we don't want to prompt for a
+ * passphrase in an already-running server. (Not that the default
+ * handling is very desirable during server start either, but some people
+ * insist we need to keep it.)
*/
- SSL_CTX_set_default_passwd_cb(context, ssl_passwd_cb);
+ if (!isServerStart)
+ SSL_CTX_set_default_passwd_cb(context, ssl_passwd_cb);
/*
* Load and verify server's certificate and private key
*/
if (SSL_CTX_use_certificate_chain_file(context, ssl_cert_file) != 1)
{
- ereport(failOnError ? FATAL : LOG,
+ ereport(isServerStart ? FATAL : LOG,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("could not load server certificate file \"%s\": %s",
ssl_cert_file, SSLerrmessage(ERR_get_error()))));
@@ -223,7 +229,7 @@ be_tls_init(bool failOnError)
if (stat(ssl_key_file, &buf) != 0)
{
- ereport(failOnError ? FATAL : LOG,
+ ereport(isServerStart ? FATAL : LOG,
(errcode_for_file_access(),
errmsg("could not access private key file \"%s\": %m",
ssl_key_file)));
@@ -232,7 +238,7 @@ be_tls_init(bool failOnError)
if (!S_ISREG(buf.st_mode))
{
- ereport(failOnError ? FATAL : LOG,
+ ereport(isServerStart ? FATAL : LOG,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("private key file \"%s\" is not a regular file",
ssl_key_file)));
@@ -240,14 +246,14 @@ be_tls_init(bool failOnError)
}
/*
- * Refuse to load files owned by users other than us or root.
+ * Refuse to load key files owned by users other than us or root.
*
* XXX surely we can check this on Windows somehow, too.
*/
#if !defined(WIN32) && !defined(__CYGWIN__)
if (buf.st_uid != geteuid() && buf.st_uid != 0)
{
- ereport(failOnError ? FATAL : LOG,
+ ereport(isServerStart ? FATAL : LOG,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("private key file \"%s\" must be owned by the database user or root",
ssl_key_file)));
@@ -270,7 +276,7 @@ be_tls_init(bool failOnError)
if ((buf.st_uid == geteuid() && buf.st_mode & (S_IRWXG | S_IRWXO)) ||
(buf.st_uid == 0 && buf.st_mode & (S_IWGRP | S_IXGRP | S_IRWXO)))
{
- ereport(failOnError ? FATAL : LOG,
+ ereport(isServerStart ? FATAL : LOG,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("private key file \"%s\" has group or world access",
ssl_key_file),
@@ -279,20 +285,31 @@ be_tls_init(bool failOnError)
}
#endif
+ /*
+ * OK, try to load the private key file.
+ */
+ ssl_passwd_cb_called = false;
+
if (SSL_CTX_use_PrivateKey_file(context,
ssl_key_file,
SSL_FILETYPE_PEM) != 1)
{
- ereport(failOnError ? FATAL : LOG,
- (errcode(ERRCODE_CONFIG_FILE_ERROR),
- errmsg("could not load private key file \"%s\": %s",
- ssl_key_file, SSLerrmessage(ERR_get_error()))));
+ if (ssl_passwd_cb_called)
+ ereport(isServerStart ? FATAL : LOG,
+ (errcode(ERRCODE_CONFIG_FILE_ERROR),
+ errmsg("private key file \"%s\" cannot be reloaded because it requires a passphrase",
+ ssl_key_file)));
+ else
+ ereport(isServerStart ? FATAL : LOG,
+ (errcode(ERRCODE_CONFIG_FILE_ERROR),
+ errmsg("could not load private key file \"%s\": %s",
+ ssl_key_file, SSLerrmessage(ERR_get_error()))));
goto error;
}
if (SSL_CTX_check_private_key(context) != 1)
{
- ereport(failOnError ? FATAL : LOG,
+ ereport(isServerStart ? FATAL : LOG,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("check of private key failed: %s",
SSLerrmessage(ERR_get_error()))));
@@ -306,13 +323,13 @@ be_tls_init(bool failOnError)
SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3);
/* set up ephemeral ECDH keys */
- if (!initialize_ecdh(context, failOnError))
+ if (!initialize_ecdh(context, isServerStart))
goto error;
/* set up the allowed cipher list */
if (SSL_CTX_set_cipher_list(context, SSLCipherSuites) != 1)
{
- ereport(failOnError ? FATAL : LOG,
+ ereport(isServerStart ? FATAL : LOG,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("could not set the cipher list (no valid ciphers available)")));
goto error;
@@ -330,7 +347,7 @@ be_tls_init(bool failOnError)
if (SSL_CTX_load_verify_locations(context, ssl_ca_file, NULL) != 1 ||
(root_cert_list = SSL_load_client_CA_file(ssl_ca_file)) == NULL)
{
- ereport(failOnError ? FATAL : LOG,
+ ereport(isServerStart ? FATAL : LOG,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("could not load root certificate file \"%s\": %s",
ssl_ca_file, SSLerrmessage(ERR_get_error()))));
@@ -366,7 +383,7 @@ be_tls_init(bool failOnError)
}
else
{
- ereport(failOnError ? FATAL : LOG,
+ ereport(isServerStart ? FATAL : LOG,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("could not load SSL certificate revocation list file \"%s\": %s",
ssl_crl_file, SSLerrmessage(ERR_get_error()))));
@@ -1071,19 +1088,16 @@ tmp_dh_cb(SSL *s, int is_export, int keylength)
*
* If OpenSSL is told to use a passphrase-protected server key, by default
* it will issue a prompt on /dev/tty and try to read a key from there.
- * That's completely no good for a postmaster SIGHUP cycle, not to mention
- * SSL context reload in an EXEC_BACKEND postmaster child. So override it
- * with this dummy function that just returns an empty passphrase,
- * guaranteeing failure. Later we might think about collecting a passphrase
- * at server start and feeding it to OpenSSL repeatedly, but we'd still
- * need this callback for that.
+ * That's no good during a postmaster SIGHUP cycle, not to mention SSL context
+ * reload in an EXEC_BACKEND postmaster child. So override it with this dummy
+ * function that just returns an empty passphrase, guaranteeing failure.
*/
static int
ssl_passwd_cb(char *buf, int size, int rwflag, void *userdata)
{
- ereport(LOG,
- (errcode(ERRCODE_CONFIG_FILE_ERROR),
- errmsg("server's private key file requires a passphrase")));
+ /* Set flag to change the error message we'll report */
+ ssl_passwd_cb_called = true;
+ /* And return empty string */
Assert(size > 0);
buf[0] = '\0';
return 0;
@@ -1151,7 +1165,7 @@ info_cb(const SSL *ssl, int type, int args)
}
static bool
-initialize_ecdh(SSL_CTX *context, bool failOnError)
+initialize_ecdh(SSL_CTX *context, bool isServerStart)
{
#ifndef OPENSSL_NO_ECDH
EC_KEY *ecdh;
@@ -1160,7 +1174,7 @@ initialize_ecdh(SSL_CTX *context, bool failOnError)
nid = OBJ_sn2nid(SSLECDHCurve);
if (!nid)
{
- ereport(failOnError ? FATAL : LOG,
+ ereport(isServerStart ? FATAL : LOG,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("ECDH: unrecognized curve name: %s", SSLECDHCurve)));
return false;
@@ -1169,7 +1183,7 @@ initialize_ecdh(SSL_CTX *context, bool failOnError)
ecdh = EC_KEY_new_by_curve_name(nid);
if (!ecdh)
{
- ereport(failOnError ? FATAL : LOG,
+ ereport(isServerStart ? FATAL : LOG,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("ECDH: could not create key")));
return false;
diff --git a/src/backend/libpq/be-secure.c b/src/backend/libpq/be-secure.c
index e5ee974c27..785dadb6c2 100644
--- a/src/backend/libpq/be-secure.c
+++ b/src/backend/libpq/be-secure.c
@@ -65,15 +65,15 @@ bool SSLPreferServerCiphers;
/*
* Initialize global context.
*
- * If failOnError is true, report any errors as FATAL (so we don't return).
- * Otherwise, log errors at LOG level and return -1 to indicate trouble.
- * Returns 0 if OK.
+ * If isServerStart is true, report any errors as FATAL (so we don't return).
+ * Otherwise, log errors at LOG level and return -1 to indicate trouble,
+ * preserving the old SSL state if any. Returns 0 if OK.
*/
int
-secure_initialize(bool failOnError)
+secure_initialize(bool isServerStart)
{
#ifdef USE_SSL
- return be_tls_init(failOnError);
+ return be_tls_init(isServerStart);
#else
return 0;
#endif
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 21066e7bb7..5be30b0ee1 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -2507,11 +2507,11 @@ SIGHUP_handler(SIGNAL_ARGS)
/* Reload authentication config files too */
if (!load_hba())
ereport(LOG,
- (errmsg("pg_hba.conf not reloaded")));
+ (errmsg("pg_hba.conf was not reloaded")));
if (!load_ident())
ereport(LOG,
- (errmsg("pg_ident.conf not reloaded")));
+ (errmsg("pg_ident.conf was not reloaded")));
#ifdef USE_SSL
/* Reload SSL configuration as well */
@@ -2521,7 +2521,7 @@ SIGHUP_handler(SIGNAL_ARGS)
LoadedSSL = true;
else
ereport(LOG,
- (errmsg("SSL context not reloaded")));
+ (errmsg("SSL configuration was not reloaded")));
}
else
{
@@ -4772,7 +4772,7 @@ SubPostmasterMain(int argc, char *argv[])
LoadedSSL = true;
else
ereport(LOG,
- (errmsg("SSL context could not be reloaded in child process")));
+ (errmsg("SSL configuration could not be loaded in child process")));
}
#endif
diff --git a/src/include/libpq/libpq-be.h b/src/include/libpq/libpq-be.h
index b7582d6126..79d38cedd7 100644
--- a/src/include/libpq/libpq-be.h
+++ b/src/include/libpq/libpq-be.h
@@ -199,7 +199,7 @@ typedef struct Port
* These functions are implemented by the glue code specific to each
* SSL implementation (e.g. be-secure-openssl.c)
*/
-extern int be_tls_init(bool failOnError);
+extern int be_tls_init(bool isServerStart);
extern void be_tls_destroy(void);
extern int be_tls_open_server(Port *port);
extern void be_tls_close(Port *port);
diff --git a/src/include/libpq/libpq.h b/src/include/libpq/libpq.h
index 46c5b726ba..538066e106 100644
--- a/src/include/libpq/libpq.h
+++ b/src/include/libpq/libpq.h
@@ -81,7 +81,7 @@ extern char *ssl_key_file;
extern char *ssl_ca_file;
extern char *ssl_crl_file;
-extern int secure_initialize(bool failOnError);
+extern int secure_initialize(bool isServerStart);
extern bool secure_loaded_verify_locations(void);
extern void secure_destroy(void);
extern int secure_open_server(Port *port);
--
cgit v1.2.3
From 7c030783a5bd07cadffc2a1018bc33119a4c7505 Mon Sep 17 00:00:00 2001
From: Simon Riggs
Date: Wed, 4 Jan 2017 19:02:07 +0000
Subject: Add pg_recvlogical —-endpos=LSN
Allow pg_recvlogical to specify an ending LSN, complementing
the existing -—startpos=LSN option.
Craig Ringer, reviewed by Euler Taveira and Naoki Okano
---
doc/src/sgml/ref/pg_recvlogical.sgml | 34 ++++++++
src/bin/pg_basebackup/pg_recvlogical.c | 145 +++++++++++++++++++++++++++++----
2 files changed, 164 insertions(+), 15 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/pg_recvlogical.sgml b/doc/src/sgml/ref/pg_recvlogical.sgml
index b35881f2b9..d066ce8701 100644
--- a/doc/src/sgml/ref/pg_recvlogical.sgml
+++ b/doc/src/sgml/ref/pg_recvlogical.sgml
@@ -38,6 +38,14 @@ PostgreSQL documentation
constraints as , plus those for logical
replication (see ).
+
+
+ pg_recvlogical> has no equivalent to the logical decoding
+ SQL interface's peek and get modes. It sends replay confirmations for
+ data lazily as it receives it and on clean exit. To examine pending data on
+ a slot without consuming it, use
+ pg_logical_slot_peek_changes>>.
+
@@ -154,6 +162,32 @@ PostgreSQL documentation
+
+
-E lsn
+
--endpos=lsn
+
+
+ In
--start
mode, automatically stop replication
+ and exit with normal exit status 0 when receiving reaches the
+ specified LSN. If specified when not in
--start
+ mode, an error is raised.
+
+
+
+ If there's a record with LSN exactly equal to lsn>,
+ the record will be output.
+
+
+
+ The
--endpos
option is not aware of transaction
+ boundaries and may truncate output partway through a transaction.
+ Any partially output transaction will not be consumed and will be
+ replayed again when the slot is next read from. Individual messages
+ are never truncated.
+
+
+
+
- Directory in which to find log segment files. The default is to search
- for them in the pg_wal subdirectory of the current
- directory.
+ Specifies a directory to search for log segment files or a
+ directory with a pg_wal subdirectory that
+ contains such files. The default is to search in the current
+ directory, the pg_wal subdirectory of the
+ current directory, and the pg_wal subdirectory
+ of PGDATA.
diff --git a/src/bin/pg_xlogdump/pg_xlogdump.c b/src/bin/pg_xlogdump/pg_xlogdump.c
index 0ad441e22e..590d2ad587 100644
--- a/src/bin/pg_xlogdump/pg_xlogdump.c
+++ b/src/bin/pg_xlogdump/pg_xlogdump.c
@@ -679,8 +679,9 @@ usage(void)
printf(_(" -e, --end=RECPTR stop reading at log position RECPTR\n"));
printf(_(" -f, --follow keep retrying after reaching end of WAL\n"));
printf(_(" -n, --limit=N number of records to display\n"));
- printf(_(" -p, --path=PATH directory in which to find log segment files\n"
- " (default: ./pg_wal)\n"));
+ printf(_(" -p, --path=PATH directory in which to find log segment files or a\n"
+ " directory with a ./pg_wal that contains such files\n"
+ " (default: current directory, ./pg_wal, PGDATA/pg_wal)\n"));
printf(_(" -r, --rmgr=RMGR only show records generated by resource manager RMGR\n"
" use --rmgr=list to list valid resource manager names\n"));
printf(_(" -s, --start=RECPTR start reading at log position RECPTR\n"));
--
cgit v1.2.3
From e574f15d6295b12c03ef8810c00976b65933711a Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Fri, 13 Jan 2017 12:00:00 -0500
Subject: Updates to reflect that pg_ctl stop -m fast is the default
Various example and test code used -m fast explicitly, but since it's
the default, this can be omitted now or should be replaced by a better
example.
pg_upgrade is not touched, so it can continue to operate with older
installations.
---
contrib/start-scripts/freebsd | 4 ++--
contrib/start-scripts/linux | 4 ++--
contrib/start-scripts/osx/PostgreSQL | 4 ++--
doc/src/sgml/ref/pg_ctl-ref.sgml | 2 +-
src/bin/pg_ctl/t/001_start_stop.pl | 10 +++++-----
src/bin/pg_ctl/t/002_status.pl | 2 +-
src/test/regress/pg_regress.c | 2 +-
src/tools/msvc/vcregress.pl | 4 ++--
8 files changed, 16 insertions(+), 16 deletions(-)
(limited to 'doc/src')
diff --git a/contrib/start-scripts/freebsd b/contrib/start-scripts/freebsd
index 758574b427..87d9b9b357 100644
--- a/contrib/start-scripts/freebsd
+++ b/contrib/start-scripts/freebsd
@@ -48,10 +48,10 @@ case $1 in
echo -n ' postgresql'
;;
stop)
- su -l $PGUSER -c "$PGCTL stop -D '$PGDATA' -s -m fast"
+ su -l $PGUSER -c "$PGCTL stop -D '$PGDATA' -s"
;;
restart)
- su -l $PGUSER -c "$PGCTL stop -D '$PGDATA' -s -m fast -w"
+ su -l $PGUSER -c "$PGCTL stop -D '$PGDATA' -s -w"
su -l $PGUSER -c "$DAEMON -D '$PGDATA' &" >>$PGLOG 2>&1
;;
status)
diff --git a/contrib/start-scripts/linux b/contrib/start-scripts/linux
index c88433a0e2..d7499c9d1e 100644
--- a/contrib/start-scripts/linux
+++ b/contrib/start-scripts/linux
@@ -97,12 +97,12 @@ case $1 in
;;
stop)
echo -n "Stopping PostgreSQL: "
- su - $PGUSER -c "$PGCTL stop -D '$PGDATA' -s -m fast"
+ su - $PGUSER -c "$PGCTL stop -D '$PGDATA' -s"
echo "ok"
;;
restart)
echo -n "Restarting PostgreSQL: "
- su - $PGUSER -c "$PGCTL stop -D '$PGDATA' -s -m fast -w"
+ su - $PGUSER -c "$PGCTL stop -D '$PGDATA' -s -w"
test -e "$PG_OOM_ADJUST_FILE" && echo "$PG_MASTER_OOM_SCORE_ADJ" > "$PG_OOM_ADJUST_FILE"
su - $PGUSER -c "$DAEMON_ENV $DAEMON -D '$PGDATA' &" >>$PGLOG 2>&1
echo "ok"
diff --git a/contrib/start-scripts/osx/PostgreSQL b/contrib/start-scripts/osx/PostgreSQL
index d38504dcc2..48c098c8f1 100755
--- a/contrib/start-scripts/osx/PostgreSQL
+++ b/contrib/start-scripts/osx/PostgreSQL
@@ -90,14 +90,14 @@ StartService () {
StopService () {
ConsoleMessage "Stopping PostgreSQL database server"
- sudo -u $PGUSER sh -c "$PGCTL stop -D '${PGDATA}' -s -m fast"
+ sudo -u $PGUSER sh -c "$PGCTL stop -D '${PGDATA}' -s"
}
RestartService () {
if [ "${POSTGRESQL:=-NO-}" = "-YES-" ]; then
ConsoleMessage "Restarting PostgreSQL database server"
# should match StopService:
- sudo -u $PGUSER sh -c "$PGCTL stop -D '${PGDATA}' -s -m fast"
+ sudo -u $PGUSER sh -c "$PGCTL stop -D '${PGDATA}' -s"
# should match StartService:
if [ "${ROTATELOGS}" = "1" ]; then
sudo -u $PGUSER sh -c "${DAEMON} -D '${PGDATA}' &" 2>&1 | ${LOGUTIL} "${PGLOG}" ${ROTATESEC} &
diff --git a/doc/src/sgml/ref/pg_ctl-ref.sgml b/doc/src/sgml/ref/pg_ctl-ref.sgml
index 5fb6898699..3f168005ec 100644
--- a/doc/src/sgml/ref/pg_ctl-ref.sgml
+++ b/doc/src/sgml/ref/pg_ctl-ref.sgml
@@ -615,7 +615,7 @@ PostgreSQL documentation
The
-m
option allows control over
how the server shuts down:
-$pg_ctl stop -m fast
+$pg_ctl stop -m smart
diff --git a/src/bin/pg_ctl/t/001_start_stop.pl b/src/bin/pg_ctl/t/001_start_stop.pl
index cbe99d79ad..b328f22487 100644
--- a/src/bin/pg_ctl/t/001_start_stop.pl
+++ b/src/bin/pg_ctl/t/001_start_stop.pl
@@ -42,14 +42,14 @@ command_ok([ 'pg_ctl', 'start', '-D', "$tempdir/data", '-w' ],
sleep 3 if ($windows_os);
command_fails([ 'pg_ctl', 'start', '-D', "$tempdir/data", '-w' ],
'second pg_ctl start -w fails');
-command_ok([ 'pg_ctl', 'stop', '-D', "$tempdir/data", '-w', '-m', 'fast' ],
+command_ok([ 'pg_ctl', 'stop', '-D', "$tempdir/data", '-w' ],
'pg_ctl stop -w');
-command_fails([ 'pg_ctl', 'stop', '-D', "$tempdir/data", '-w', '-m', 'fast' ],
+command_fails([ 'pg_ctl', 'stop', '-D', "$tempdir/data", '-w' ],
'second pg_ctl stop fails');
-command_ok([ 'pg_ctl', 'restart', '-D', "$tempdir/data", '-w', '-m', 'fast' ],
+command_ok([ 'pg_ctl', 'restart', '-D', "$tempdir/data", '-w' ],
'pg_ctl restart with server not running');
-command_ok([ 'pg_ctl', 'restart', '-D', "$tempdir/data", '-w', '-m', 'fast' ],
+command_ok([ 'pg_ctl', 'restart', '-D', "$tempdir/data", '-w' ],
'pg_ctl restart with server running');
-system_or_bail 'pg_ctl', 'stop', '-D', "$tempdir/data", '-m', 'fast';
+system_or_bail 'pg_ctl', 'stop', '-D', "$tempdir/data";
diff --git a/src/bin/pg_ctl/t/002_status.pl b/src/bin/pg_ctl/t/002_status.pl
index 98e171e394..606d10560f 100644
--- a/src/bin/pg_ctl/t/002_status.pl
+++ b/src/bin/pg_ctl/t/002_status.pl
@@ -22,4 +22,4 @@ system_or_bail 'pg_ctl', '-l', "$tempdir/logfile", '-D',
command_exit_is([ 'pg_ctl', 'status', '-D', $node->data_dir ],
0, 'pg_ctl status with server running');
-system_or_bail 'pg_ctl', 'stop', '-D', $node->data_dir, '-m', 'fast';
+system_or_bail 'pg_ctl', 'stop', '-D', $node->data_dir;
diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c
index baebcc8c32..d4d00d9c66 100644
--- a/src/test/regress/pg_regress.c
+++ b/src/test/regress/pg_regress.c
@@ -265,7 +265,7 @@ stop_postmaster(void)
fflush(stderr);
snprintf(buf, sizeof(buf),
- "\"%s%spg_ctl\" stop -D \"%s/data\" -s -m fast",
+ "\"%s%spg_ctl\" stop -D \"%s/data\" -s",
bindir ? bindir : "",
bindir ? "/" : "",
temp_instance);
diff --git a/src/tools/msvc/vcregress.pl b/src/tools/msvc/vcregress.pl
index bcf22677ac..f575e40052 100644
--- a/src/tools/msvc/vcregress.pl
+++ b/src/tools/msvc/vcregress.pl
@@ -465,7 +465,7 @@ sub upgradecheck
@args = ('pg_dumpall', '-f', "$tmp_root/dump1.sql");
system(@args) == 0 or exit 1;
print "\nStopping old cluster\n\n";
- system("pg_ctl -m fast stop") == 0 or exit 1;
+ system("pg_ctl stop") == 0 or exit 1;
$ENV{PGDATA} = "$data";
print "\nSetting up new cluster\n\n";
standard_initdb() or exit 1;
@@ -483,7 +483,7 @@ sub upgradecheck
@args = ('pg_dumpall', '-f', "$tmp_root/dump2.sql");
system(@args) == 0 or exit 1;
print "\nStopping new cluster\n\n";
- system("pg_ctl -m fast stop") == 0 or exit 1;
+ system("pg_ctl stop") == 0 or exit 1;
print "\nDeleting old cluster\n\n";
system(".\\delete_old_cluster.bat") == 0 or exit 1;
print "\nComparing old and new cluster dumps\n\n";
--
cgit v1.2.3
From 05cd12ed5bc74c853a161c5a138a0cb6f9b0cb8a Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Fri, 13 Jan 2017 12:00:00 -0500
Subject: pg_ctl: Change default to wait for all actions
The different actions in pg_ctl had different defaults for -w and -W,
mostly for historical reasons. Most users will want the -w behavior, so
make that the default.
Remove the -w option in most example and test code, so avoid confusion
and reduce verbosity. pg_upgrade is not touched, so it can continue to
work with older installations.
Reviewed-by: Beena Emerson
Reviewed-by: Ryan Murphy
---
contrib/start-scripts/freebsd | 5 ++--
contrib/start-scripts/linux | 5 ++--
contrib/start-scripts/osx/PostgreSQL | 3 +--
doc/src/sgml/ref/pg_ctl-ref.sgml | 36 ++++++++++++++-------------
src/bin/pg_ctl/pg_ctl.c | 15 ++---------
src/bin/pg_ctl/t/001_start_stop.pl | 18 +++++++-------
src/bin/pg_ctl/t/003_promote.pl | 10 ++++----
src/test/modules/commit_ts/t/003_standby_2.pl | 2 +-
src/test/perl/PostgresNode.pm | 12 ++++-----
src/tools/msvc/vcregress.pl | 4 +--
10 files changed, 49 insertions(+), 61 deletions(-)
(limited to 'doc/src')
diff --git a/contrib/start-scripts/freebsd b/contrib/start-scripts/freebsd
index 87d9b9b357..c6ac8cd47a 100644
--- a/contrib/start-scripts/freebsd
+++ b/contrib/start-scripts/freebsd
@@ -28,8 +28,7 @@ PGLOG="$PGDATA/serverlog"
PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
# What to use to start up the postmaster. (If you want the script to wait
-# until the server has started, you could use "pg_ctl start -w" here.
-# But without -w, pg_ctl adds no value.)
+# until the server has started, you could use "pg_ctl start" here.)
DAEMON="$prefix/bin/postmaster"
# What to use to shut down the postmaster
@@ -51,7 +50,7 @@ case $1 in
su -l $PGUSER -c "$PGCTL stop -D '$PGDATA' -s"
;;
restart)
- su -l $PGUSER -c "$PGCTL stop -D '$PGDATA' -s -w"
+ su -l $PGUSER -c "$PGCTL stop -D '$PGDATA' -s"
su -l $PGUSER -c "$DAEMON -D '$PGDATA' &" >>$PGLOG 2>&1
;;
status)
diff --git a/contrib/start-scripts/linux b/contrib/start-scripts/linux
index d7499c9d1e..44a775b030 100644
--- a/contrib/start-scripts/linux
+++ b/contrib/start-scripts/linux
@@ -60,8 +60,7 @@ PGLOG="$PGDATA/serverlog"
PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
# What to use to start up the postmaster. (If you want the script to wait
-# until the server has started, you could use "pg_ctl start -w" here.
-# But without -w, pg_ctl adds no value.)
+# until the server has started, you could use "pg_ctl start" here.)
DAEMON="$prefix/bin/postmaster"
# What to use to shut down the postmaster
@@ -102,7 +101,7 @@ case $1 in
;;
restart)
echo -n "Restarting PostgreSQL: "
- su - $PGUSER -c "$PGCTL stop -D '$PGDATA' -s -w"
+ su - $PGUSER -c "$PGCTL stop -D '$PGDATA' -s"
test -e "$PG_OOM_ADJUST_FILE" && echo "$PG_MASTER_OOM_SCORE_ADJ" > "$PG_OOM_ADJUST_FILE"
su - $PGUSER -c "$DAEMON_ENV $DAEMON -D '$PGDATA' &" >>$PGLOG 2>&1
echo "ok"
diff --git a/contrib/start-scripts/osx/PostgreSQL b/contrib/start-scripts/osx/PostgreSQL
index 48c098c8f1..9735c8c57f 100755
--- a/contrib/start-scripts/osx/PostgreSQL
+++ b/contrib/start-scripts/osx/PostgreSQL
@@ -65,8 +65,7 @@ ROTATESEC="604800"
PATH="$prefix/bin:/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin"
# What to use to start up the postmaster. (If you want the script to wait
-# until the server has started, you could use "pg_ctl start -w" here.
-# But without -w, pg_ctl adds no value.)
+# until the server has started, you could use "pg_ctl start" here.)
DAEMON="$prefix/bin/postmaster"
# What to use to shut down the postmaster
diff --git a/doc/src/sgml/ref/pg_ctl-ref.sgml b/doc/src/sgml/ref/pg_ctl-ref.sgml
index 3f168005ec..176dfaf98a 100644
--- a/doc/src/sgml/ref/pg_ctl-ref.sgml
+++ b/doc/src/sgml/ref/pg_ctl-ref.sgml
@@ -31,7 +31,7 @@ PostgreSQL documentation
pg_ctl
options
@@ -391,17 +391,7 @@ PostgreSQL documentation
Wait for an operation to complete. This is supported for the
modes start, stop,
restart, promote,
- and register.
-
-
-
- Waiting is the default option for shutdowns, but not startups,
- restarts, or promotions. This is mainly for historical reasons; the
- waiting option is almost always preferable. If waiting is not
- selected, the requested action is triggered, but there is no feedback
- about its success. In that case, the server log file or an external
- monitoring system would have to be used to check the progress and
- success of the operation.
+ and register, and is the default for those modes.
@@ -424,6 +414,18 @@ PostgreSQL documentation
Do not wait for an operation to complete. This is the opposite of the
option
-w
.
+
+
+ If waiting is disabled, the requested action is triggered, but there
+ is no feedback about its success. In that case, the server log file
+ or an external monitoring system would have to be used to check the
+ progress and success of the operation.
+
+
+
+ In prior releases of PostgreSQL, this was the default except for
+ the stop mode.
+
@@ -593,7 +595,7 @@ PostgreSQL documentation
To start the server, waiting until the server is
accepting connections:
-$pg_ctl -w start
+$pg_ctl start
@@ -637,7 +639,7 @@ PostgreSQL documentation
To restart the server,
waiting for it to shut down and restart:
-$pg_ctl -w restart
+$pg_ctl restart
diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c
index c161b19935..93e5891950 100644
--- a/src/bin/pg_ctl/pg_ctl.c
+++ b/src/bin/pg_ctl/pg_ctl.c
@@ -71,8 +71,7 @@ typedef enum
#define DEFAULT_WAIT 60
-static bool do_wait = false;
-static bool wait_set = false;
+static bool do_wait = true;
static int wait_seconds = DEFAULT_WAIT;
static bool wait_seconds_arg = false;
static bool silent_mode = false;
@@ -1959,7 +1958,7 @@ do_help(void)
printf(_(" -s, --silent only print errors, no informational messages\n"));
printf(_(" -t, --timeout=SECS seconds to wait when using -w option\n"));
printf(_(" -V, --version output version information, then exit\n"));
- printf(_(" -w, --wait wait until operation completes\n"));
+ printf(_(" -w, --wait wait until operation completes (default)\n"));
printf(_(" -W, --no-wait do not wait until operation completes\n"));
printf(_(" -?, --help show this help, then exit\n"));
printf(_("(The default is to wait for shutdown, but not for start or restart.)\n\n"));
@@ -2323,11 +2322,9 @@ main(int argc, char **argv)
break;
case 'w':
do_wait = true;
- wait_set = true;
break;
case 'W':
do_wait = false;
- wait_set = true;
break;
case 'c':
allow_core_files = true;
@@ -2423,14 +2420,6 @@ main(int argc, char **argv)
exit(1);
}
- if (!wait_set)
- {
- if (ctl_command == STOP_COMMAND)
- do_wait = true;
- else
- do_wait = false;
- }
-
if (ctl_command == RELOAD_COMMAND)
{
sig = SIGHUP;
diff --git a/src/bin/pg_ctl/t/001_start_stop.pl b/src/bin/pg_ctl/t/001_start_stop.pl
index b328f22487..8f16bf9795 100644
--- a/src/bin/pg_ctl/t/001_start_stop.pl
+++ b/src/bin/pg_ctl/t/001_start_stop.pl
@@ -32,24 +32,24 @@ else
print CONF "listen_addresses = '127.0.0.1'\n";
}
close CONF;
-command_ok([ 'pg_ctl', 'start', '-D', "$tempdir/data", '-w' ],
- 'pg_ctl start -w');
+command_ok([ 'pg_ctl', 'start', '-D', "$tempdir/data" ],
+ 'pg_ctl start');
# sleep here is because Windows builds can't check postmaster.pid exactly,
# so they may mistake a pre-existing postmaster.pid for one created by the
# postmaster they start. Waiting more than the 2 seconds slop time allowed
# by test_postmaster_connection prevents that mistake.
sleep 3 if ($windows_os);
-command_fails([ 'pg_ctl', 'start', '-D', "$tempdir/data", '-w' ],
- 'second pg_ctl start -w fails');
-command_ok([ 'pg_ctl', 'stop', '-D', "$tempdir/data", '-w' ],
- 'pg_ctl stop -w');
-command_fails([ 'pg_ctl', 'stop', '-D', "$tempdir/data", '-w' ],
+command_fails([ 'pg_ctl', 'start', '-D', "$tempdir/data" ],
+ 'second pg_ctl start fails');
+command_ok([ 'pg_ctl', 'stop', '-D', "$tempdir/data" ],
+ 'pg_ctl stop');
+command_fails([ 'pg_ctl', 'stop', '-D', "$tempdir/data" ],
'second pg_ctl stop fails');
-command_ok([ 'pg_ctl', 'restart', '-D', "$tempdir/data", '-w' ],
+command_ok([ 'pg_ctl', 'restart', '-D', "$tempdir/data" ],
'pg_ctl restart with server not running');
-command_ok([ 'pg_ctl', 'restart', '-D', "$tempdir/data", '-w' ],
+command_ok([ 'pg_ctl', 'restart', '-D', "$tempdir/data" ],
'pg_ctl restart with server running');
system_or_bail 'pg_ctl', 'stop', '-D', "$tempdir/data";
diff --git a/src/bin/pg_ctl/t/003_promote.pl b/src/bin/pg_ctl/t/003_promote.pl
index 0b6090b6eb..7b1df29fa3 100644
--- a/src/bin/pg_ctl/t/003_promote.pl
+++ b/src/bin/pg_ctl/t/003_promote.pl
@@ -32,13 +32,13 @@ $node_standby->start;
is($node_standby->safe_psql('postgres', 'SELECT pg_is_in_recovery()'),
't', 'standby is in recovery');
-command_ok([ 'pg_ctl', '-D', $node_standby->data_dir, 'promote' ],
- 'pg_ctl promote of standby runs');
+command_ok([ 'pg_ctl', '-D', $node_standby->data_dir, '-W', 'promote' ],
+ 'pg_ctl -W promote of standby runs');
ok($node_standby->poll_query_until('postgres', 'SELECT NOT pg_is_in_recovery()'),
'promoted standby is not in recovery');
-# same again with wait option
+# same again with default wait option
$node_standby = get_new_node('standby2');
$node_standby->init_from_backup($node_primary, 'my_backup', has_streaming => 1);
$node_standby->start;
@@ -46,8 +46,8 @@ $node_standby->start;
is($node_standby->safe_psql('postgres', 'SELECT pg_is_in_recovery()'),
't', 'standby is in recovery');
-command_ok([ 'pg_ctl', '-D', $node_standby->data_dir, '-w', 'promote' ],
- 'pg_ctl -w promote of standby runs');
+command_ok([ 'pg_ctl', '-D', $node_standby->data_dir, 'promote' ],
+ 'pg_ctl promote of standby runs');
# no wait here
diff --git a/src/test/modules/commit_ts/t/003_standby_2.pl b/src/test/modules/commit_ts/t/003_standby_2.pl
index d37ff182c4..043ccb14a5 100644
--- a/src/test/modules/commit_ts/t/003_standby_2.pl
+++ b/src/test/modules/commit_ts/t/003_standby_2.pl
@@ -55,7 +55,7 @@ $master->restart;
$master->append_conf('postgresql.conf', 'track_commit_timestamp = off');
$master->restart;
-system_or_bail('pg_ctl', '-w', '-D', $standby->data_dir, 'promote');
+system_or_bail('pg_ctl', '-D', $standby->data_dir, 'promote');
$standby->poll_query_until('postgres', "SELECT pg_is_in_recovery() <> true");
$standby->safe_psql('postgres', "create table t11()");
diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm
index 868492b615..d8be1bd3e1 100644
--- a/src/test/perl/PostgresNode.pm
+++ b/src/test/perl/PostgresNode.pm
@@ -632,7 +632,7 @@ port = $port
=item $node->start()
-Wrapper for pg_ctl -w start
+Wrapper for pg_ctl start
Start the node and wait until it is ready to accept connections.
@@ -645,7 +645,7 @@ sub start
my $pgdata = $self->data_dir;
my $name = $self->name;
print("### Starting node \"$name\"\n");
- my $ret = TestLib::system_log('pg_ctl', '-w', '-D', $self->data_dir, '-l',
+ my $ret = TestLib::system_log('pg_ctl', '-D', $self->data_dir, '-l',
$self->logfile, 'start');
if ($ret != 0)
@@ -702,7 +702,7 @@ sub reload
=item $node->restart()
-Wrapper for pg_ctl -w restart
+Wrapper for pg_ctl restart
=cut
@@ -714,7 +714,7 @@ sub restart
my $logfile = $self->logfile;
my $name = $self->name;
print "### Restarting node \"$name\"\n";
- TestLib::system_log('pg_ctl', '-D', $pgdata, '-w', '-l', $logfile,
+ TestLib::system_log('pg_ctl', '-D', $pgdata, '-l', $logfile,
'restart');
$self->_update_pid;
}
@@ -723,7 +723,7 @@ sub restart
=item $node->promote()
-Wrapper for pg_ctl promote -w
+Wrapper for pg_ctl promote
=cut
@@ -735,7 +735,7 @@ sub promote
my $logfile = $self->logfile;
my $name = $self->name;
print "### Promoting node \"$name\"\n";
- TestLib::system_log('pg_ctl', '-D', $pgdata, '-w', '-l', $logfile,
+ TestLib::system_log('pg_ctl', '-D', $pgdata, '-l', $logfile,
'promote');
}
diff --git a/src/tools/msvc/vcregress.pl b/src/tools/msvc/vcregress.pl
index f575e40052..f1b9819cd2 100644
--- a/src/tools/msvc/vcregress.pl
+++ b/src/tools/msvc/vcregress.pl
@@ -448,7 +448,7 @@ sub upgradecheck
print "\nRunning initdb on old cluster\n\n";
standard_initdb() or exit 1;
print "\nStarting old cluster\n\n";
- my @args = ('pg_ctl', 'start', '-l', "$logdir/postmaster1.log", '-w');
+ my @args = ('pg_ctl', 'start', '-l', "$logdir/postmaster1.log");
system(@args) == 0 or exit 1;
print "\nCreating databases with names covering most ASCII bytes\n\n";
@@ -475,7 +475,7 @@ sub upgradecheck
$bindir, '-B', $bindir);
system(@args) == 0 or exit 1;
print "\nStarting new cluster\n\n";
- @args = ('pg_ctl', '-l', "$logdir/postmaster2.log", '-w', 'start');
+ @args = ('pg_ctl', '-l', "$logdir/postmaster2.log", 'start');
system(@args) == 0 or exit 1;
print "\nSetting up stats on new cluster\n\n";
system(".\\analyze_new_cluster.bat") == 0 or exit 1;
--
cgit v1.2.3
From f6d6d2920d2cddc4b381cbbf2915db9d985d20a9 Mon Sep 17 00:00:00 2001
From: Magnus Hagander
Date: Sat, 14 Jan 2017 17:14:56 +0100
Subject: Change default values for backup and replication parameters
This changes the default values of the following parameters:
wal_level = replica
max_wal_senders = 10
max_replication_slots = 10
in order to make it possible to make a backup and set up simple
replication on the default settings, without requiring a system restart.
Discussion: https://postgr.es/m/CABUevEy4PR_EAvZEzsbF5s+V0eEvw7shJ2t-AUwbHOjT+yRb3A@mail.gmail.com
Reviewed by Peter Eisentraut. Benchmark help from Tomas Vondra.
---
doc/src/sgml/backup.sgml | 3 ++-
doc/src/sgml/config.sgml | 16 ++++++++--------
src/backend/utils/misc/guc.c | 6 +++---
src/backend/utils/misc/postgresql.conf.sample | 6 +++---
src/test/modules/commit_ts/t/002_standby.pl | 1 -
src/test/modules/commit_ts/t/003_standby_2.pl | 1 -
src/test/perl/PostgresNode.pm | 6 +++++-
src/test/recovery/t/006_logical_decoding.pl | 1 -
src/test/recovery/t/008_fsm_truncation.pl | 1 -
9 files changed, 21 insertions(+), 20 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/backup.sgml b/doc/src/sgml/backup.sgml
index 6eaed1efbe..d7df91090d 100644
--- a/doc/src/sgml/backup.sgml
+++ b/doc/src/sgml/backup.sgml
@@ -1420,7 +1420,8 @@ restore_command = 'cp /mnt/server/archivedir/%f %p'
If more flexibility in copying the backup files is needed, a lower
level process can be used for standalone hot backups as well.
- To prepare for low level standalone hot backups, set wal_level> to
+ To prepare for low level standalone hot backups, make sure
+ wal_level> is set to
replica> or higher, archive_mode> to
on>, and set up an archive_command> that performs
archiving only when a switch file> exists. For example:
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 30dd54cd5d..07afa3c77a 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -2183,12 +2183,12 @@ include_dir 'conf.d'
- wal_level> determines how much information is written
- to the WAL. The default value is minimal>, which writes
- only the information needed to recover from a crash or immediate
- shutdown. replica> adds logging required for WAL
- archiving as well as information required to run
- read-only queries on a standby server. Finally,
+ wal_level> determines how much information is written to
+ the WAL. The default value is replica>, which writes enough
+ data to support WAL archiving and replication, including running
+ read-only queries on a standby server. minimal> removes all
+ logging except the information required to recover from a crash or
+ immediate shutdown. Finally,
logical> adds information necessary to support logical
decoding. Each level includes the information logged at all lower
levels. This parameter can only be set at server start.
@@ -2926,7 +2926,7 @@ include_dir 'conf.d'
Specifies the maximum number of concurrent connections from
standby servers or streaming base backup clients (i.e., the
maximum number of simultaneously running WAL sender
- processes). The default is zero, meaning replication is
+ processes). The default is 10. The value 0 means replication is
disabled. WAL sender processes count towards the total number
of connections, so the parameter cannot be set higher than
. Abrupt streaming client
@@ -2951,7 +2951,7 @@ include_dir 'conf.d'
Specifies the maximum number of replication slots
(see ) that the server
- can support. The default is zero. This parameter can only be set at
+ can support. The default is 10. This parameter can only be set at
server start.
wal_level must be set
to replica or higher to allow replication slots to
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 5b23dbf4a0..4e2bd4c496 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2315,7 +2315,7 @@ static struct config_int ConfigureNamesInt[] =
NULL
},
&max_wal_senders,
- 0, 0, MAX_BACKENDS,
+ 10, 0, MAX_BACKENDS,
NULL, NULL, NULL
},
@@ -2326,7 +2326,7 @@ static struct config_int ConfigureNamesInt[] =
NULL
},
&max_replication_slots,
- 0, 0, MAX_BACKENDS /* XXX? */ ,
+ 10, 0, MAX_BACKENDS /* XXX? */ ,
NULL, NULL, NULL
},
@@ -3749,7 +3749,7 @@ static struct config_enum ConfigureNamesEnum[] =
NULL
},
&wal_level,
- WAL_LEVEL_MINIMAL, wal_level_options,
+ WAL_LEVEL_REPLICA, wal_level_options,
NULL, NULL, NULL
},
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index b3f29610d0..15669b83c7 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -174,7 +174,7 @@
# - Settings -
-#wal_level = minimal # minimal, replica, or logical
+#wal_level = replica # minimal, replica, or logical
# (change requires restart)
#fsync = on # flush data to disk for crash safety
# (turning this off can cause
@@ -229,12 +229,12 @@
# Set these on the master and on any standby that will send replication data.
-#max_wal_senders = 0 # max number of walsender processes
+#max_wal_senders = 10 # max number of walsender processes
# (change requires restart)
#wal_keep_segments = 0 # in logfile segments, 16MB each; 0 disables
#wal_sender_timeout = 60s # in milliseconds; 0 disables
-#max_replication_slots = 0 # max number of replication slots
+#max_replication_slots = 10 # max number of replication slots
# (change requires restart)
#track_commit_timestamp = off # collect timestamp of transaction commit
# (change requires restart)
diff --git a/src/test/modules/commit_ts/t/002_standby.pl b/src/test/modules/commit_ts/t/002_standby.pl
index 4dbde2978e..ff60044540 100644
--- a/src/test/modules/commit_ts/t/002_standby.pl
+++ b/src/test/modules/commit_ts/t/002_standby.pl
@@ -15,7 +15,6 @@ $master->append_conf(
'postgresql.conf', qq{
track_commit_timestamp = on
max_wal_senders = 5
- wal_level = hot_standby
});
$master->start;
$master->backup($bkplabel);
diff --git a/src/test/modules/commit_ts/t/003_standby_2.pl b/src/test/modules/commit_ts/t/003_standby_2.pl
index 043ccb14a5..1775b22dad 100644
--- a/src/test/modules/commit_ts/t/003_standby_2.pl
+++ b/src/test/modules/commit_ts/t/003_standby_2.pl
@@ -14,7 +14,6 @@ $master->append_conf(
'postgresql.conf', qq{
track_commit_timestamp = on
max_wal_senders = 5
- wal_level = hot_standby
});
$master->start;
$master->backup($bkplabel);
diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm
index d8be1bd3e1..932478183a 100644
--- a/src/test/perl/PostgresNode.pm
+++ b/src/test/perl/PostgresNode.pm
@@ -415,7 +415,6 @@ sub init
if ($params{allows_streaming})
{
- print $conf "wal_level = replica\n";
print $conf "max_wal_senders = 5\n";
print $conf "wal_keep_segments = 20\n";
print $conf "max_wal_size = 128MB\n";
@@ -424,6 +423,11 @@ sub init
print $conf "hot_standby = on\n";
print $conf "max_connections = 10\n";
}
+ else
+ {
+ print $conf "wal_level = minimal\n";
+ print $conf "max_wal_senders = 0\n";
+ }
if ($TestLib::windows_os)
{
diff --git a/src/test/recovery/t/006_logical_decoding.pl b/src/test/recovery/t/006_logical_decoding.pl
index b80a9a9415..1716360a17 100644
--- a/src/test/recovery/t/006_logical_decoding.pl
+++ b/src/test/recovery/t/006_logical_decoding.pl
@@ -10,7 +10,6 @@ my $node_master = get_new_node('master');
$node_master->init(allows_streaming => 1);
$node_master->append_conf(
'postgresql.conf', qq(
-max_replication_slots = 4
wal_level = logical
));
$node_master->start;
diff --git a/src/test/recovery/t/008_fsm_truncation.pl b/src/test/recovery/t/008_fsm_truncation.pl
index 9f6bdb0b64..5220611e44 100644
--- a/src/test/recovery/t/008_fsm_truncation.pl
+++ b/src/test/recovery/t/008_fsm_truncation.pl
@@ -14,7 +14,6 @@ $node_master->init(allows_streaming => 1);
$node_master->append_conf('postgresql.conf', qq{
fsync = on
-wal_level = replica
wal_log_hints = on
max_prepared_transactions = 5
autovacuum = off
--
cgit v1.2.3
From e7b020f786bf3b344f81d70aa423525fd4f44dfa Mon Sep 17 00:00:00 2001
From: Magnus Hagander
Date: Mon, 16 Jan 2017 13:56:43 +0100
Subject: Make pg_basebackup use temporary replication slots
Temporary replication slots will be used by default when wal streaming
is used and no slot name is specified with -S. If a slot name is
specified, then a permanent slot with that name is used. If --no-slot is
specified, then no permanent or temporary slot will be used.
Temporary slots are only used on 10.0 and newer, of course.
---
doc/src/sgml/ref/pg_basebackup.sgml | 25 ++++++++++++++++
src/bin/pg_basebackup/pg_basebackup.c | 45 +++++++++++++++++++++++++++-
src/bin/pg_basebackup/pg_receivexlog.c | 3 ++
src/bin/pg_basebackup/pg_recvlogical.c | 1 +
src/bin/pg_basebackup/receivelog.c | 22 ++++++++++++--
src/bin/pg_basebackup/receivelog.h | 6 ++--
src/bin/pg_basebackup/streamutil.c | 1 -
src/bin/pg_basebackup/streamutil.h | 1 -
src/bin/pg_basebackup/t/010_pg_basebackup.pl | 5 +++-
9 files changed, 101 insertions(+), 8 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index ba7d16abf0..5c2db2581c 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -240,6 +240,31 @@ PostgreSQL documentation
the server does not remove any necessary WAL data in the time between
the end of the base backup and the start of streaming replication.
+
+ If this option is not specified and the server supports temporary
+ replication slots (version 10 and later), then a temporary replication
+ slot is automatically used for WAL streaming.
+
+
+
+
+
+
--no-slot
+
+
+ This option prevents the creation of a temporary replication slot
+ during the backup even if it's supported by the server.
+
+
+ Temporary replication slots are created by default if no slot name
+ is given with the option
-S
when using log streaming.
+
+
+ The main purpose of this option is to allow taking a base backup when
+ the server is out of free replication slots. Using replication slots
+ is almost always preferred, because it prevents needed WAL from being
+ removed by the server during the backup.
+
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 8ebf24e771..e7fb527d3a 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -61,6 +61,11 @@ typedef struct TablespaceList
*/
#define MINIMUM_VERSION_FOR_PG_WAL 100000
+/*
+ * Temporary replication slots are supported from version 10.
+ */
+#define MINIMUM_VERSION_FOR_TEMP_SLOTS 100000
+
/*
* Different ways to include WAL
*/
@@ -88,6 +93,8 @@ static bool do_sync = true;
static int standby_message_timeout = 10 * 1000; /* 10 sec = default */
static pg_time_t last_progress_report = 0;
static int32 maxrate = 0; /* no limit by default */
+static char *replication_slot = NULL;
+static bool temp_replication_slot = true;
static bool success = false;
static bool made_new_pgdata = false;
@@ -332,6 +339,7 @@ usage(void)
printf(_(" -R, --write-recovery-conf\n"
" write recovery.conf after backup\n"));
printf(_(" -S, --slot=SLOTNAME replication slot to use\n"));
+ printf(_(" --no-slot prevent creation of temporary replication slot\n"));
printf(_(" -T, --tablespace-mapping=OLDDIR=NEWDIR\n"
" relocate tablespace in OLDDIR to NEWDIR\n"));
printf(_(" -X, --xlog-method=none|fetch|stream\n"
@@ -460,6 +468,7 @@ typedef struct
char xlog[MAXPGPATH]; /* directory or tarfile depending on mode */
char *sysidentifier;
int timeline;
+ bool temp_slot;
} logstreamer_param;
static int
@@ -479,6 +488,10 @@ LogStreamerMain(logstreamer_param *param)
stream.do_sync = do_sync;
stream.mark_done = true;
stream.partial_suffix = NULL;
+ stream.replication_slot = replication_slot;
+ stream.temp_slot = param->temp_slot;
+ if (stream.temp_slot && !stream.replication_slot)
+ stream.replication_slot = psprintf("pg_basebackup_%d", (int) getpid());
if (format == 'p')
stream.walmethod = CreateWalDirectoryMethod(param->xlog, do_sync);
@@ -565,6 +578,11 @@ StartLogStreamer(char *startpos, uint32 timeline, char *sysidentifier)
PQserverVersion(conn) < MINIMUM_VERSION_FOR_PG_WAL ?
"pg_xlog" : "pg_wal");
+ /* Temporary replication slots are only supported in 10 and newer */
+ if (PQserverVersion(conn) < MINIMUM_VERSION_FOR_TEMP_SLOTS)
+ param->temp_slot = false;
+ else
+ param->temp_slot = temp_replication_slot;
if (format == 'p')
{
@@ -2063,11 +2081,13 @@ main(int argc, char **argv)
{"verbose", no_argument, NULL, 'v'},
{"progress", no_argument, NULL, 'P'},
{"xlogdir", required_argument, NULL, 1},
+ {"no-slot", no_argument, NULL, 2},
{NULL, 0, NULL, 0}
};
int c;
int option_index;
+ bool no_slot = false;
progname = get_progname(argv[0]);
set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_basebackup"));
@@ -2117,7 +2137,16 @@ main(int argc, char **argv)
writerecoveryconf = true;
break;
case 'S':
+
+ /*
+ * When specifying replication slot name, use a permanent
+ * slot.
+ */
replication_slot = pg_strdup(optarg);
+ temp_replication_slot = false;
+ break;
+ case 2:
+ no_slot = true;
break;
case 'T':
tablespace_list_append(optarg);
@@ -2277,7 +2306,7 @@ main(int argc, char **argv)
exit(1);
}
- if (replication_slot && includewal != STREAM_WAL)
+ if ((replication_slot || no_slot) && includewal != STREAM_WAL)
{
fprintf(stderr,
_("%s: replication slots can only be used with WAL streaming\n"),
@@ -2287,6 +2316,20 @@ main(int argc, char **argv)
exit(1);
}
+ if (no_slot)
+ {
+ if (replication_slot)
+ {
+ fprintf(stderr,
+ _("%s: --no-slot cannot be used with slot name\n"),
+ progname);
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+ progname);
+ exit(1);
+ }
+ temp_replication_slot = false;
+ }
+
if (strcmp(xlog_dir, "") != 0)
{
if (format != 'p')
diff --git a/src/bin/pg_basebackup/pg_receivexlog.c b/src/bin/pg_basebackup/pg_receivexlog.c
index b6f57a878c..11c31bbe13 100644
--- a/src/bin/pg_basebackup/pg_receivexlog.c
+++ b/src/bin/pg_basebackup/pg_receivexlog.c
@@ -41,6 +41,7 @@ static bool do_create_slot = false;
static bool slot_exists_ok = false;
static bool do_drop_slot = false;
static bool synchronous = false;
+static char *replication_slot = NULL;
static void usage(void);
@@ -340,6 +341,8 @@ StreamLog(void)
stream.mark_done = false;
stream.walmethod = CreateWalDirectoryMethod(basedir, stream.do_sync);
stream.partial_suffix = ".partial";
+ stream.replication_slot = replication_slot;
+ stream.temp_slot = false;
ReceiveXlogStream(conn, &stream);
diff --git a/src/bin/pg_basebackup/pg_recvlogical.c b/src/bin/pg_basebackup/pg_recvlogical.c
index 658e2ba91f..d16d08b664 100644
--- a/src/bin/pg_basebackup/pg_recvlogical.c
+++ b/src/bin/pg_basebackup/pg_recvlogical.c
@@ -45,6 +45,7 @@ static bool do_create_slot = false;
static bool slot_exists_ok = false;
static bool do_start_slot = false;
static bool do_drop_slot = false;
+static char *replication_slot = NULL;
/* filled pairwise with option, value. value may be NULL */
static char **options;
diff --git a/src/bin/pg_basebackup/receivelog.c b/src/bin/pg_basebackup/receivelog.c
index f771c1ffdc..55612832a6 100644
--- a/src/bin/pg_basebackup/receivelog.c
+++ b/src/bin/pg_basebackup/receivelog.c
@@ -455,10 +455,10 @@ ReceiveXlogStream(PGconn *conn, StreamCtl *stream)
* synchronous_standby_names, but we've protected them against it so
* far, so let's continue to do so unless specifically requested.
*/
- if (replication_slot != NULL)
+ if (stream->replication_slot != NULL)
{
reportFlushPosition = true;
- sprintf(slotcmd, "SLOT \"%s\" ", replication_slot);
+ sprintf(slotcmd, "SLOT \"%s\" ", stream->replication_slot);
}
else
{
@@ -508,6 +508,24 @@ ReceiveXlogStream(PGconn *conn, StreamCtl *stream)
PQclear(res);
}
+ /*
+ * Create temporary replication slot if one is needed
+ */
+ if (stream->temp_slot)
+ {
+ snprintf(query, sizeof(query),
+ "CREATE_REPLICATION_SLOT \"%s\" TEMPORARY PHYSICAL RESERVE_WAL",
+ stream->replication_slot);
+ res = PQexec(conn, query);
+ if (PQresultStatus(res) != PGRES_TUPLES_OK)
+ {
+ fprintf(stderr, _("%s: could not create temporary replication slot \"%s\": %s"),
+ progname, stream->replication_slot, PQerrorMessage(conn));
+ PQclear(res);
+ return false;
+ }
+ }
+
/*
* initialize flush position to starting point, it's the caller's
* responsibility that that's sane.
diff --git a/src/bin/pg_basebackup/receivelog.h b/src/bin/pg_basebackup/receivelog.h
index e50d62222d..42e93ac745 100644
--- a/src/bin/pg_basebackup/receivelog.h
+++ b/src/bin/pg_basebackup/receivelog.h
@@ -37,13 +37,15 @@ typedef struct StreamCtl
* often */
bool synchronous; /* Flush immediately WAL data on write */
bool mark_done; /* Mark segment as done in generated archive */
- bool do_sync; /* Flush to disk to ensure consistent state
- * of data */
+ bool do_sync; /* Flush to disk to ensure consistent state of
+ * data */
stream_stop_callback stream_stop; /* Stop streaming when returns true */
WalWriteMethod *walmethod; /* How to write the WAL */
char *partial_suffix; /* Suffix appended to partially received files */
+ char *replication_slot; /* Replication slot to use, or NULL */
+ bool temp_slot; /* Create temporary replication slot */
} StreamCtl;
diff --git a/src/bin/pg_basebackup/streamutil.c b/src/bin/pg_basebackup/streamutil.c
index 01be3e7c36..31290d35f6 100644
--- a/src/bin/pg_basebackup/streamutil.c
+++ b/src/bin/pg_basebackup/streamutil.c
@@ -38,7 +38,6 @@ char *connection_string = NULL;
char *dbhost = NULL;
char *dbuser = NULL;
char *dbport = NULL;
-char *replication_slot = NULL;
char *dbname = NULL;
int dbgetpassword = 0; /* 0=auto, -1=never, 1=always */
static bool have_password = false;
diff --git a/src/bin/pg_basebackup/streamutil.h b/src/bin/pg_basebackup/streamutil.h
index 47ab3df55f..663bfac5cc 100644
--- a/src/bin/pg_basebackup/streamutil.h
+++ b/src/bin/pg_basebackup/streamutil.h
@@ -23,7 +23,6 @@ extern char *dbuser;
extern char *dbport;
extern char *dbname;
extern int dbgetpassword;
-extern char *replication_slot;
/* Connection kept global so we can disconnect easily */
extern PGconn *conn;
diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
index 4c6670ce72..2c5a3658d5 100644
--- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl
+++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
@@ -4,7 +4,7 @@ use Cwd;
use Config;
use PostgresNode;
use TestLib;
-use Test::More tests => 71;
+use Test::More tests => 72;
program_help_ok('pg_basebackup');
program_version_ok('pg_basebackup');
@@ -244,6 +244,9 @@ $node->command_ok(
[ 'pg_basebackup', '-D', "$tempdir/backupxst", '-X', 'stream', '-Ft' ],
'pg_basebackup -X stream runs in tar mode');
ok(-f "$tempdir/backupxst/pg_wal.tar", "tar file was created");
+$node->command_ok(
+ [ 'pg_basebackup', '-D', "$tempdir/backupnoslot", '-X', 'stream', '--no-slot' ],
+ 'pg_basebackup -X stream runs with --no-slot');
$node->command_fails(
[ 'pg_basebackup', '-D', "$tempdir/fail", '-S', 'slot1' ],
--
cgit v1.2.3
From cada1af31d769a6b607018d68894f2c879ff275f Mon Sep 17 00:00:00 2001
From: Magnus Hagander
Date: Tue, 17 Jan 2017 12:10:26 +0100
Subject: Add compression support to pg_receivexlog
Author: Michael Paquier, review and small changes by me
---
doc/src/sgml/ref/pg_receivexlog.sgml | 13 +++++
src/bin/pg_basebackup/pg_basebackup.c | 2 +-
src/bin/pg_basebackup/pg_receivexlog.c | 104 +++++++++++++++++++++++++++++++--
src/bin/pg_basebackup/walmethods.c | 95 +++++++++++++++++++++++++-----
src/bin/pg_basebackup/walmethods.h | 3 +-
5 files changed, 197 insertions(+), 20 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/pg_receivexlog.sgml b/doc/src/sgml/ref/pg_receivexlog.sgml
index bfa055b58b..8c1ea9a2e2 100644
--- a/doc/src/sgml/ref/pg_receivexlog.sgml
+++ b/doc/src/sgml/ref/pg_receivexlog.sgml
@@ -180,6 +180,19 @@ PostgreSQL documentation
+
+
+
-Z level
+
--compress=level
+
+
+ Enables gzip compression of transaction logs, and specifies the
+ compression level (0 through 9, 0 being no compression and 9 being best
+ compression). The suffix .gz will
+ automatically be added to all filenames.
+
+
+
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index c5ae1cc147..ce1fe3bf00 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -494,7 +494,7 @@ LogStreamerMain(logstreamer_param *param)
stream.replication_slot = psprintf("pg_basebackup_%d", (int) getpid());
if (format == 'p')
- stream.walmethod = CreateWalDirectoryMethod(param->xlog, do_sync);
+ stream.walmethod = CreateWalDirectoryMethod(param->xlog, 0, do_sync);
else
stream.walmethod = CreateWalTarMethod(param->xlog, compresslevel, do_sync);
diff --git a/src/bin/pg_basebackup/pg_receivexlog.c b/src/bin/pg_basebackup/pg_receivexlog.c
index 11c31bbe13..135e2070f3 100644
--- a/src/bin/pg_basebackup/pg_receivexlog.c
+++ b/src/bin/pg_basebackup/pg_receivexlog.c
@@ -34,6 +34,7 @@
/* Global options */
static char *basedir = NULL;
static int verbose = 0;
+static int compresslevel = 0;
static int noloop = 0;
static int standby_message_timeout = 10 * 1000; /* 10 sec = default */
static volatile bool time_to_abort = false;
@@ -58,6 +59,15 @@ static bool stop_streaming(XLogRecPtr segendpos, uint32 timeline,
exit(code); \
}
+/* Routines to evaluate segment file format */
+#define IsCompressXLogFileName(fname) \
+ (strlen(fname) == XLOG_FNAME_LEN + strlen(".gz") && \
+ strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN && \
+ strcmp((fname) + XLOG_FNAME_LEN, ".gz") == 0)
+#define IsPartialCompressXLogFileName(fname) \
+ (strlen(fname) == XLOG_FNAME_LEN + strlen(".gz.partial") && \
+ strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN && \
+ strcmp((fname) + XLOG_FNAME_LEN, ".gz.partial") == 0)
static void
usage(void)
@@ -76,6 +86,7 @@ usage(void)
printf(_(" --synchronous flush transaction log immediately after writing\n"));
printf(_(" -v, --verbose output verbose messages\n"));
printf(_(" -V, --version output version information, then exit\n"));
+ printf(_(" -Z, --compress=0-9 compress logs with given compression level\n"));
printf(_(" -?, --help show this help, then exit\n"));
printf(_("\nConnection options:\n"));
printf(_(" -d, --dbname=CONNSTR connection string\n"));
@@ -188,14 +199,31 @@ FindStreamingStart(uint32 *tli)
uint32 tli;
XLogSegNo segno;
bool ispartial;
+ bool iscompress;
/*
* Check if the filename looks like an xlog file, or a .partial file.
*/
if (IsXLogFileName(dirent->d_name))
+ {
ispartial = false;
+ iscompress = false;
+ }
else if (IsPartialXLogFileName(dirent->d_name))
+ {
+ ispartial = true;
+ iscompress = false;
+ }
+ else if (IsCompressXLogFileName(dirent->d_name))
+ {
+ ispartial = false;
+ iscompress = true;
+ }
+ else if (IsPartialCompressXLogFileName(dirent->d_name))
+ {
ispartial = true;
+ iscompress = true;
+ }
else
continue;
@@ -206,9 +234,15 @@ FindStreamingStart(uint32 *tli)
/*
* Check that the segment has the right size, if it's supposed to be
- * completed.
+ * completed. For non-compressed segments just check the on-disk size
+ * and see if it matches a completed segment.
+ * For compressed segments, look at the last 4 bytes of the compressed
+ * file, which is where the uncompressed size is located for gz files
+ * with a size lower than 4GB, and then compare it to the size of a
+ * completed segment. The 4 last bytes correspond to the ISIZE member
+ * according to http://www.zlib.org/rfc-gzip.html.
*/
- if (!ispartial)
+ if (!ispartial && !iscompress)
{
struct stat statbuf;
char fullpath[MAXPGPATH];
@@ -229,6 +263,47 @@ FindStreamingStart(uint32 *tli)
continue;
}
}
+ else if (!ispartial && iscompress)
+ {
+ int fd;
+ char buf[4];
+ int bytes_out;
+ char fullpath[MAXPGPATH];
+
+ snprintf(fullpath, sizeof(fullpath), "%s/%s", basedir, dirent->d_name);
+
+ fd = open(fullpath, O_RDONLY | PG_BINARY);
+ if (fd < 0)
+ {
+ fprintf(stderr, _("%s: could not open compressed file \"%s\": %s\n"),
+ progname, fullpath, strerror(errno));
+ disconnect_and_exit(1);
+ }
+ if (lseek(fd, (off_t)(-4), SEEK_END) < 0)
+ {
+ fprintf(stderr, _("%s: could not seek compressed file \"%s\": %s\n"),
+ progname, fullpath, strerror(errno));
+ disconnect_and_exit(1);
+ }
+ if (read(fd, (char *) buf, sizeof(buf)) != sizeof(buf))
+ {
+ fprintf(stderr, _("%s: could not read compressed file \"%s\": %s\n"),
+ progname, fullpath, strerror(errno));
+ disconnect_and_exit(1);
+ }
+
+ close(fd);
+ bytes_out = (buf[3] << 24) | (buf[2] << 16) |
+ (buf[1] << 8) | buf[0];
+
+ if (bytes_out != XLOG_SEG_SIZE)
+ {
+ fprintf(stderr,
+ _("%s: compressed segment file \"%s\" has incorrect uncompressed size %d, skipping\n"),
+ progname, dirent->d_name, bytes_out);
+ continue;
+ }
+ }
/* Looks like a valid segment. Remember that we saw it. */
if ((segno > high_segno) ||
@@ -339,7 +414,8 @@ StreamLog(void)
stream.synchronous = synchronous;
stream.do_sync = true;
stream.mark_done = false;
- stream.walmethod = CreateWalDirectoryMethod(basedir, stream.do_sync);
+ stream.walmethod = CreateWalDirectoryMethod(basedir, compresslevel,
+ stream.do_sync);
stream.partial_suffix = ".partial";
stream.replication_slot = replication_slot;
stream.temp_slot = false;
@@ -392,6 +468,7 @@ main(int argc, char **argv)
{"status-interval", required_argument, NULL, 's'},
{"slot", required_argument, NULL, 'S'},
{"verbose", no_argument, NULL, 'v'},
+ {"compress", required_argument, NULL, 'Z'},
/* action */
{"create-slot", no_argument, NULL, 1},
{"drop-slot", no_argument, NULL, 2},
@@ -422,7 +499,7 @@ main(int argc, char **argv)
}
}
- while ((c = getopt_long(argc, argv, "D:d:h:p:U:s:S:nwWv",
+ while ((c = getopt_long(argc, argv, "D:d:h:p:U:s:S:nwWvZ:",
long_options, &option_index)) != -1)
{
switch (c)
@@ -472,6 +549,15 @@ main(int argc, char **argv)
case 'v':
verbose++;
break;
+ case 'Z':
+ compresslevel = atoi(optarg);
+ if (compresslevel < 0 || compresslevel > 9)
+ {
+ fprintf(stderr, _("%s: invalid compression level \"%s\"\n"),
+ progname, optarg);
+ exit(1);
+ }
+ break;
/* action */
case 1:
do_create_slot = true;
@@ -538,6 +624,16 @@ main(int argc, char **argv)
exit(1);
}
+#ifndef HAVE_LIBZ
+ if (compresslevel != 0)
+ {
+ fprintf(stderr,
+ _("%s: this build does not support compression\n"),
+ progname);
+ exit(1);
+ }
+#endif
+
/*
* Check existence of destination folder.
*/
diff --git a/src/bin/pg_basebackup/walmethods.c b/src/bin/pg_basebackup/walmethods.c
index 88ee603b8b..d9ad596bf0 100644
--- a/src/bin/pg_basebackup/walmethods.c
+++ b/src/bin/pg_basebackup/walmethods.c
@@ -41,6 +41,7 @@
typedef struct DirectoryMethodData
{
char *basedir;
+ int compression;
bool sync;
} DirectoryMethodData;
static DirectoryMethodData *dir_data = NULL;
@@ -55,6 +56,9 @@ typedef struct DirectoryMethodFile
char *pathname;
char *fullpath;
char *temp_suffix;
+#ifdef HAVE_LIBZ
+ gzFile gzfp;
+#endif
} DirectoryMethodFile;
static char *
@@ -70,17 +74,47 @@ dir_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_
static char tmppath[MAXPGPATH];
int fd;
DirectoryMethodFile *f;
+#ifdef HAVE_LIBZ
+ gzFile gzfp = NULL;
+#endif
- snprintf(tmppath, sizeof(tmppath), "%s/%s%s",
- dir_data->basedir, pathname, temp_suffix ? temp_suffix : "");
+ snprintf(tmppath, sizeof(tmppath), "%s/%s%s%s",
+ dir_data->basedir, pathname,
+ dir_data->compression > 0 ? ".gz" : "",
+ temp_suffix ? temp_suffix : "");
+ /*
+ * Open a file for non-compressed as well as compressed files. Tracking
+ * the file descriptor is important for dir_sync() method as gzflush()
+ * does not do any system calls to fsync() to make changes permanent on
+ * disk.
+ */
fd = open(tmppath, O_WRONLY | O_CREAT | PG_BINARY, S_IRUSR | S_IWUSR);
if (fd < 0)
return NULL;
- if (pad_to_size)
+#ifdef HAVE_LIBZ
+ if (dir_data->compression > 0)
+ {
+ gzfp = gzdopen(fd, "wb");
+ if (gzfp == NULL)
+ {
+ close(fd);
+ return NULL;
+ }
+
+ if (gzsetparams(gzfp, dir_data->compression,
+ Z_DEFAULT_STRATEGY) != Z_OK)
+ {
+ gzclose(gzfp);
+ return NULL;
+ }
+ }
+#endif
+
+ /* Do pre-padding on non-compressed files */
+ if (pad_to_size && dir_data->compression == 0)
{
- /* Always pre-pad on regular files */
char *zerobuf;
int bytes;
@@ -120,12 +154,21 @@ dir_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_
if (fsync_fname(tmppath, false, progname) != 0 ||
fsync_parent_path(tmppath, progname) != 0)
{
- close(fd);
+#ifdef HAVE_LIBZ
+ if (dir_data->compression > 0)
+ gzclose(gzfp);
+ else
+#endif
+ close(fd);
return NULL;
}
}
f = pg_malloc0(sizeof(DirectoryMethodFile));
+#ifdef HAVE_LIBZ
+ if (dir_data->compression > 0)
+ f->gzfp = gzfp;
+#endif
f->fd = fd;
f->currpos = 0;
f->pathname = pg_strdup(pathname);
@@ -144,7 +187,12 @@ dir_write(Walfile f, const void *buf, size_t count)
Assert(f != NULL);
- r = write(df->fd, buf, count);
+#ifdef HAVE_LIBZ
+ if (dir_data->compression > 0)
+ r = (ssize_t) gzwrite(df->gzfp, buf, count);
+ else
+#endif
+ r = write(df->fd, buf, count);
if (r > 0)
df->currpos += r;
return r;
@@ -169,7 +217,12 @@ dir_close(Walfile f, WalCloseMethod method)
Assert(f != NULL);
- r = close(df->fd);
+#ifdef HAVE_LIBZ
+ if (dir_data->compression > 0)
+ r = gzclose(df->gzfp);
+ else
+#endif
+ r = close(df->fd);
if (r == 0)
{
@@ -180,17 +233,22 @@ dir_close(Walfile f, WalCloseMethod method)
* If we have a temp prefix, normal operation is to rename the
* file.
*/
- snprintf(tmppath, sizeof(tmppath), "%s/%s%s",
- dir_data->basedir, df->pathname, df->temp_suffix);
- snprintf(tmppath2, sizeof(tmppath2), "%s/%s",
- dir_data->basedir, df->pathname);
+ snprintf(tmppath, sizeof(tmppath), "%s/%s%s%s",
+ dir_data->basedir, df->pathname,
+ dir_data->compression > 0 ? ".gz" : "",
+ df->temp_suffix);
+ snprintf(tmppath2, sizeof(tmppath2), "%s/%s%s",
+ dir_data->basedir, df->pathname,
+ dir_data->compression > 0 ? ".gz" : "");
r = durable_rename(tmppath, tmppath2, progname);
}
else if (method == CLOSE_UNLINK)
{
/* Unlink the file once it's closed */
- snprintf(tmppath, sizeof(tmppath), "%s/%s%s",
- dir_data->basedir, df->pathname, df->temp_suffix ? df->temp_suffix : "");
+ snprintf(tmppath, sizeof(tmppath), "%s/%s%s%s",
+ dir_data->basedir, df->pathname,
+ dir_data->compression > 0 ? ".gz" : "",
+ df->temp_suffix ? df->temp_suffix : "");
r = unlink(tmppath);
}
else
@@ -226,6 +284,14 @@ dir_sync(Walfile f)
if (!dir_data->sync)
return 0;
+#ifdef HAVE_LIBZ
+ if (dir_data->compression > 0)
+ {
+ if (gzflush(((DirectoryMethodFile *) f)->gzfp, Z_SYNC_FLUSH) != Z_OK)
+ return -1;
+ }
+#endif
+
return fsync(((DirectoryMethodFile *) f)->fd);
}
@@ -277,7 +343,7 @@ dir_finish(void)
WalWriteMethod *
-CreateWalDirectoryMethod(const char *basedir, bool sync)
+CreateWalDirectoryMethod(const char *basedir, int compression, bool sync)
{
WalWriteMethod *method;
@@ -293,6 +359,7 @@ CreateWalDirectoryMethod(const char *basedir, bool sync)
method->getlasterror = dir_getlasterror;
dir_data = pg_malloc0(sizeof(DirectoryMethodData));
+ dir_data->compression = compression;
dir_data->basedir = pg_strdup(basedir);
dir_data->sync = sync;
diff --git a/src/bin/pg_basebackup/walmethods.h b/src/bin/pg_basebackup/walmethods.h
index c1723d53b5..2cd8b6d755 100644
--- a/src/bin/pg_basebackup/walmethods.h
+++ b/src/bin/pg_basebackup/walmethods.h
@@ -41,7 +41,8 @@ struct WalWriteMethod
* (only implements the methods required for pg_basebackup,
* not all those required for pg_receivexlog)
*/
-WalWriteMethod *CreateWalDirectoryMethod(const char *basedir, bool sync);
+WalWriteMethod *CreateWalDirectoryMethod(const char *basedir,
+ int compression, bool sync);
WalWriteMethod *CreateWalTarMethod(const char *tarbase, int compression, bool sync);
/* Cleanup routines for previously-created methods */
--
cgit v1.2.3
From 6181c34da882544c4842e7442189a7ea1dd03032 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Tue, 17 Jan 2017 12:00:00 -0500
Subject: doc: Update URL for Microsoft download site
---
doc/src/sgml/install-windows.sgml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/install-windows.sgml b/doc/src/sgml/install-windows.sgml
index 20fc47ae5f..ecec0a60c7 100644
--- a/doc/src/sgml/install-windows.sgml
+++ b/doc/src/sgml/install-windows.sgml
@@ -169,7 +169,7 @@ $ENV{MSBFLAGS}="/m";
Microsoft Windows SDK it
is recommended that you upgrade to the latest version (currently
version 7.1), available for download from
- >.
+ >.
You must always include the
--
cgit v1.2.3
From aa17c06fb58533d09c79c68a4d34a6f56687ee38 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Wed, 18 Jan 2017 12:00:00 -0500
Subject: Add function to import operating system collations
Move this logic out of initdb into a user-callable function. This
simplifies the code and makes it possible to update the standard
collations later on if additional operating system collations appear.
Reviewed-by: Andres Freund
Reviewed-by: Euler Taveira
---
doc/src/sgml/charset.sgml | 2 +-
doc/src/sgml/func.sgml | 40 ++++++++
src/backend/catalog/pg_collation.c | 31 ++++++-
src/backend/commands/collationcmds.c | 154 ++++++++++++++++++++++++++++++-
src/bin/initdb/initdb.c | 166 +---------------------------------
src/include/catalog/catversion.h | 2 +-
src/include/catalog/pg_collation_fn.h | 3 +-
src/include/catalog/pg_proc.h | 3 +
8 files changed, 229 insertions(+), 172 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml
index f8c7ac3b16..2aba0fc528 100644
--- a/doc/src/sgml/charset.sgml
+++ b/doc/src/sgml/charset.sgml
@@ -496,7 +496,7 @@ SELECT * FROM test1 ORDER BY a || b COLLATE "fr_FR";
-
+ Managing Collations
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 10e31868ba..eb1b6984bf 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -19190,6 +19190,46 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
in the database's default tablespace, the tablespace can be specified as 0.
+
+ lists functions used to manage
+ collations.
+
+
+
+
+
+ pg_import_system_collations> populates the system
+ catalog pg_collation with collations based on all the
+ locales it finds on the operating system. This is
+ what initdb uses;
+ see for more details. If additional
+ locales are installed into the operating system later on, this function
+ can be run again to add collations for the new locales. In that case, the
+ parameter if_not_exists should be set to true to
+ skip over existing collations. The schema
+ parameter would typically be pg_catalog, but that is
+ not a requirement. (Collation objects based on locales that are no longer
+ present on the operating system are never removed by this function.)
+
+
diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c
index fa42ad5ec1..694c0f67f5 100644
--- a/src/backend/catalog/pg_collation.c
+++ b/src/backend/catalog/pg_collation.c
@@ -41,7 +41,8 @@ Oid
CollationCreate(const char *collname, Oid collnamespace,
Oid collowner,
int32 collencoding,
- const char *collcollate, const char *collctype)
+ const char *collcollate, const char *collctype,
+ bool if_not_exists)
{
Relation rel;
TupleDesc tupDesc;
@@ -72,10 +73,21 @@ CollationCreate(const char *collname, Oid collnamespace,
PointerGetDatum(collname),
Int32GetDatum(collencoding),
ObjectIdGetDatum(collnamespace)))
- ereport(ERROR,
+ {
+ if (if_not_exists)
+ {
+ ereport(NOTICE,
(errcode(ERRCODE_DUPLICATE_OBJECT),
- errmsg("collation \"%s\" for encoding \"%s\" already exists",
+ errmsg("collation \"%s\" for encoding \"%s\" already exists, skipping",
collname, pg_encoding_to_char(collencoding))));
+ return InvalidOid;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("collation \"%s\" for encoding \"%s\" already exists",
+ collname, pg_encoding_to_char(collencoding))));
+ }
/*
* Also forbid matching an any-encoding entry. This test of course is not
@@ -86,10 +98,21 @@ CollationCreate(const char *collname, Oid collnamespace,
PointerGetDatum(collname),
Int32GetDatum(-1),
ObjectIdGetDatum(collnamespace)))
- ereport(ERROR,
+ {
+ if (if_not_exists)
+ {
+ ereport(NOTICE,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("collation \"%s\" already exists, skipping",
+ collname)));
+ return InvalidOid;
+ }
+ else
+ ereport(ERROR,
(errcode(ERRCODE_DUPLICATE_OBJECT),
errmsg("collation \"%s\" already exists",
collname)));
+ }
/* open pg_collation */
rel = heap_open(CollationRelationId, RowExclusiveLock);
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index ccadfc2e47..5cb3e2bb28 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -136,7 +136,11 @@ DefineCollation(ParseState *pstate, List *names, List *parameters)
GetUserId(),
GetDatabaseEncoding(),
collcollate,
- collctype);
+ collctype,
+ false);
+
+ if (!OidIsValid(newoid))
+ return InvalidObjectAddress;
ObjectAddressSet(address, CollationRelationId, newoid);
@@ -177,3 +181,151 @@ IsThereCollationInNamespace(const char *collname, Oid nspOid)
errmsg("collation \"%s\" already exists in schema \"%s\"",
collname, get_namespace_name(nspOid))));
}
+
+
+/*
+ * "Normalize" a locale name, stripping off encoding tags such as
+ * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
+ * -> "br_FR@euro"). Return true if a new, different name was
+ * generated.
+ */
+pg_attribute_unused()
+static bool
+normalize_locale_name(char *new, const char *old)
+{
+ char *n = new;
+ const char *o = old;
+ bool changed = false;
+
+ while (*o)
+ {
+ if (*o == '.')
+ {
+ /* skip over encoding tag such as ".utf8" or ".UTF-8" */
+ o++;
+ while ((*o >= 'A' && *o <= 'Z')
+ || (*o >= 'a' && *o <= 'z')
+ || (*o >= '0' && *o <= '9')
+ || (*o == '-'))
+ o++;
+ changed = true;
+ }
+ else
+ *n++ = *o++;
+ }
+ *n = '\0';
+
+ return changed;
+}
+
+
+Datum
+pg_import_system_collations(PG_FUNCTION_ARGS)
+{
+#if defined(HAVE_LOCALE_T) && !defined(WIN32)
+ bool if_not_exists = PG_GETARG_BOOL(0);
+ Oid nspid = PG_GETARG_OID(1);
+
+ FILE *locale_a_handle;
+ char localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */
+ int count = 0;
+#endif
+
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ (errmsg("must be superuser to import system collations"))));
+
+#if defined(HAVE_LOCALE_T) && !defined(WIN32)
+ locale_a_handle = OpenPipeStream("locale -a", "r");
+ if (locale_a_handle == NULL)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not execute command \"%s\": %m",
+ "locale -a")));
+
+ while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
+ {
+ int i;
+ size_t len;
+ int enc;
+ bool skip;
+ char alias[NAMEDATALEN];
+
+ len = strlen(localebuf);
+
+ if (len == 0 || localebuf[len - 1] != '\n')
+ {
+ elog(DEBUG1, "locale name too long, skipped: \"%s\"", localebuf);
+ continue;
+ }
+ localebuf[len - 1] = '\0';
+
+ /*
+ * Some systems have locale names that don't consist entirely of ASCII
+ * letters (such as "bokmål" or "français"). This is
+ * pretty silly, since we need the locale itself to interpret the
+ * non-ASCII characters. We can't do much with those, so we filter
+ * them out.
+ */
+ skip = false;
+ for (i = 0; i < len; i++)
+ {
+ if (IS_HIGHBIT_SET(localebuf[i]))
+ {
+ skip = true;
+ break;
+ }
+ }
+ if (skip)
+ {
+ elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"", localebuf);
+ continue;
+ }
+
+ enc = pg_get_encoding_from_locale(localebuf, false);
+ if (enc < 0)
+ {
+ /* error message printed by pg_get_encoding_from_locale() */
+ continue;
+ }
+ if (!PG_VALID_BE_ENCODING(enc))
+ continue; /* ignore locales for client-only encodings */
+ if (enc == PG_SQL_ASCII)
+ continue; /* C/POSIX are already in the catalog */
+
+ count++;
+
+ CollationCreate(localebuf, nspid, GetUserId(), enc,
+ localebuf, localebuf, if_not_exists);
+
+ CommandCounterIncrement();
+
+ /*
+ * Generate aliases such as "en_US" in addition to "en_US.utf8" for
+ * ease of use. Note that collation names are unique per encoding
+ * only, so this doesn't clash with "en_US" for LATIN1, say.
+ *
+ * This always runs in "if not exists" mode, to skip aliases that
+ * conflict with an existing locale name for the same encoding. For
+ * example, "br_FR.iso88591" is normalized to "br_FR", both for
+ * encoding LATIN1. But the unnormalized locale "br_FR" already
+ * exists for LATIN1.
+ */
+ if (normalize_locale_name(alias, localebuf))
+ {
+ CollationCreate(alias, nspid, GetUserId(), enc,
+ localebuf, localebuf, true);
+ CommandCounterIncrement();
+ }
+ }
+
+ ClosePipeStream(locale_a_handle);
+
+ if (count == 0)
+ ereport(ERROR,
+ (errmsg("no usable system locales were found")));
+#endif /* not HAVE_LOCALE_T && not WIN32 */
+
+ PG_RETURN_VOID();
+}
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 1e7d677244..eb1be100c8 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -1608,178 +1608,16 @@ setup_description(FILE *cmdfd)
PG_CMD_PUTS("DROP TABLE tmp_pg_shdescription;\n\n");
}
-#ifdef HAVE_LOCALE_T
-/*
- * "Normalize" a locale name, stripping off encoding tags such as
- * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
- * -> "br_FR@euro"). Return true if a new, different name was
- * generated.
- */
-static bool
-normalize_locale_name(char *new, const char *old)
-{
- char *n = new;
- const char *o = old;
- bool changed = false;
-
- while (*o)
- {
- if (*o == '.')
- {
- /* skip over encoding tag such as ".utf8" or ".UTF-8" */
- o++;
- while ((*o >= 'A' && *o <= 'Z')
- || (*o >= 'a' && *o <= 'z')
- || (*o >= '0' && *o <= '9')
- || (*o == '-'))
- o++;
- changed = true;
- }
- else
- *n++ = *o++;
- }
- *n = '\0';
-
- return changed;
-}
-#endif /* HAVE_LOCALE_T */
-
/*
* populate pg_collation
*/
static void
setup_collation(FILE *cmdfd)
{
-#if defined(HAVE_LOCALE_T) && !defined(WIN32)
- int i;
- FILE *locale_a_handle;
- char localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */
- int count = 0;
-
- locale_a_handle = popen_check("locale -a", "r");
- if (!locale_a_handle)
- return; /* complaint already printed */
-
- PG_CMD_PUTS("CREATE TEMP TABLE tmp_pg_collation ( "
- " collname name, "
- " locale name, "
- " encoding int) WITHOUT OIDS;\n\n");
-
- while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
- {
- size_t len;
- int enc;
- bool skip;
- char *quoted_locale;
- char alias[NAMEDATALEN];
-
- len = strlen(localebuf);
-
- if (len == 0 || localebuf[len - 1] != '\n')
- {
- if (debug)
- fprintf(stderr, _("%s: locale name too long, skipped: \"%s\"\n"),
- progname, localebuf);
- continue;
- }
- localebuf[len - 1] = '\0';
-
- /*
- * Some systems have locale names that don't consist entirely of ASCII
- * letters (such as "bokmål" or "français"). This is
- * pretty silly, since we need the locale itself to interpret the
- * non-ASCII characters. We can't do much with those, so we filter
- * them out.
- */
- skip = false;
- for (i = 0; i < len; i++)
- {
- if (IS_HIGHBIT_SET(localebuf[i]))
- {
- skip = true;
- break;
- }
- }
- if (skip)
- {
- if (debug)
- fprintf(stderr, _("%s: locale name has non-ASCII characters, skipped: \"%s\"\n"),
- progname, localebuf);
- continue;
- }
-
- enc = pg_get_encoding_from_locale(localebuf, debug);
- if (enc < 0)
- {
- /* error message printed by pg_get_encoding_from_locale() */
- continue;
- }
- if (!PG_VALID_BE_ENCODING(enc))
- continue; /* ignore locales for client-only encodings */
- if (enc == PG_SQL_ASCII)
- continue; /* C/POSIX are already in the catalog */
-
- count++;
-
- quoted_locale = escape_quotes(localebuf);
-
- PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n\n",
- quoted_locale, quoted_locale, enc);
-
- /*
- * Generate aliases such as "en_US" in addition to "en_US.utf8" for
- * ease of use. Note that collation names are unique per encoding
- * only, so this doesn't clash with "en_US" for LATIN1, say.
- */
- if (normalize_locale_name(alias, localebuf))
- {
- char *quoted_alias = escape_quotes(alias);
-
- PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n\n",
- quoted_alias, quoted_locale, enc);
- free(quoted_alias);
- }
- free(quoted_locale);
- }
+ PG_CMD_PUTS("SELECT pg_import_system_collations(if_not_exists => false, schema => 'pg_catalog');\n\n");
/* Add an SQL-standard name */
- PG_CMD_PRINTF1("INSERT INTO tmp_pg_collation VALUES ('ucs_basic', 'C', %d);\n\n", PG_UTF8);
-
- /*
- * When copying collations to the final location, eliminate aliases that
- * conflict with an existing locale name for the same encoding. For
- * example, "br_FR.iso88591" is normalized to "br_FR", both for encoding
- * LATIN1. But the unnormalized locale "br_FR" already exists for LATIN1.
- * Prefer the alias that matches the OS locale name, else the first locale
- * name by sort order (arbitrary choice to be deterministic).
- *
- * Also, eliminate any aliases that conflict with pg_collation's
- * hard-wired entries for "C" etc.
- */
- PG_CMD_PUTS("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) "
- " SELECT DISTINCT ON (collname, encoding)"
- " collname, "
- " (SELECT oid FROM pg_namespace WHERE nspname = 'pg_catalog') AS collnamespace, "
- " (SELECT relowner FROM pg_class WHERE relname = 'pg_collation') AS collowner, "
- " encoding, locale, locale "
- " FROM tmp_pg_collation"
- " WHERE NOT EXISTS (SELECT 1 FROM pg_collation WHERE collname = tmp_pg_collation.collname)"
- " ORDER BY collname, encoding, (collname = locale) DESC, locale;\n\n");
-
- /*
- * Even though the table is temp, drop it explicitly so it doesn't get
- * copied into template0/postgres databases.
- */
- PG_CMD_PUTS("DROP TABLE tmp_pg_collation;\n\n");
-
- pclose(locale_a_handle);
-
- if (count == 0 && !debug)
- {
- printf(_("No usable system locales were found.\n"));
- printf(_("Use the option \"--debug\" to see details.\n"));
- }
-#endif /* not HAVE_LOCALE_T && not WIN32 */
+ PG_CMD_PRINTF2("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) VALUES ('ucs_basic', 'pg_catalog'::regnamespace, '%s'::regrole, %d, 'C', 'C');\n\n", escape_quotes(username), PG_UTF8);
}
/*
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 54211f5618..7d33f39bea 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201701172
+#define CATALOG_VERSION_NO 201701181
#endif
diff --git a/src/include/catalog/pg_collation_fn.h b/src/include/catalog/pg_collation_fn.h
index 1ea757f150..482ba7920e 100644
--- a/src/include/catalog/pg_collation_fn.h
+++ b/src/include/catalog/pg_collation_fn.h
@@ -17,7 +17,8 @@
extern Oid CollationCreate(const char *collname, Oid collnamespace,
Oid collowner,
int32 collencoding,
- const char *collcollate, const char *collctype);
+ const char *collcollate, const char *collctype,
+ bool if_not_exists);
extern void RemoveCollationById(Oid collationOid);
#endif /* PG_COLLATION_FN_H */
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 42f36891af..1a0eba3ca1 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -5349,6 +5349,9 @@ DESCR("pg_controldata recovery state information as a function");
DATA(insert OID = 3444 ( pg_control_init PGNSP PGUID 12 1 0 0 0 f f f f t f v s 0 0 2249 "" "{23,23,23,23,23,23,23,23,23,16,16,16,23}" "{o,o,o,o,o,o,o,o,o,o,o,o,o}" "{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,bigint_timestamps,float4_pass_by_value,float8_pass_by_value,data_page_checksum_version}" _null_ _null_ pg_control_init _null_ _null_ _null_ ));
DESCR("pg_controldata init state information as a function");
+DATA(insert OID = 3445 ( pg_import_system_collations PGNSP PGUID 12 100 0 0 0 f f f f t f v r 2 0 2278 "16 4089" _null_ _null_ "{if_not_exists,schema}" _null_ _null_ pg_import_system_collations _null_ _null_ _null_ ));
+DESCR("import collations from operating system");
+
/*
* Symbolic values for provolatile column: these indicate whether the result
* of a function is dependent *only* on the values of its explicit arguments,
--
cgit v1.2.3
From d00ca333c338b40911e89939c3cc771025978595 Mon Sep 17 00:00:00 2001
From: Magnus Hagander
Date: Wed, 18 Jan 2017 21:37:59 +0100
Subject: Implement array version of jsonb_delete and operator
This makes it possible to delete multiple keys from a jsonb value by
passing in an array of text values, which makes the operaiton much
faster than individually deleting the keys (which would require copying
the jsonb structure over and over again.
Reviewed by Dmitry Dolgov and Michael Paquier
---
doc/src/sgml/func.sgml | 8 ++++
src/backend/utils/adt/jsonfuncs.c | 86 +++++++++++++++++++++++++++++++++++++
src/include/catalog/pg_operator.h | 2 +
src/include/catalog/pg_proc.h | 3 +-
src/test/regress/expected/jsonb.out | 18 ++++++++
src/test/regress/sql/jsonb.sql | 4 ++
6 files changed, 120 insertions(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index eb1b6984bf..2504a466e6 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -10843,6 +10843,14 @@ table2-mapping
on their key value.
'{"a": "b"}'::jsonb - 'a'
+
+ -
+ text[]
+ Delete multiple key/value pairs or string
+ elements from left operand. Key/value pairs are matched based
+ on their key value.
+ '{"a": "b", "c": "d"}'::jsonb - '{a,c}'::text[]
+ -integer
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index 58c721c074..d624fdbf79 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -3437,6 +3437,92 @@ jsonb_delete(PG_FUNCTION_ARGS)
PG_RETURN_JSONB(JsonbValueToJsonb(res));
}
+/*
+ * SQL function jsonb_delete (jsonb, variadic text[])
+ *
+ * return a copy of the jsonb with the indicated items
+ * removed.
+ */
+Datum
+jsonb_delete_array(PG_FUNCTION_ARGS)
+{
+ Jsonb *in = PG_GETARG_JSONB(0);
+ ArrayType *keys = PG_GETARG_ARRAYTYPE_P(1);
+ Datum *keys_elems;
+ bool *keys_nulls;
+ int keys_len;
+ JsonbParseState *state = NULL;
+ JsonbIterator *it;
+ JsonbValue v,
+ *res = NULL;
+ bool skipNested = false;
+ JsonbIteratorToken r;
+
+ if (ARR_NDIM(keys) > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("wrong number of array subscripts")));
+
+ if (JB_ROOT_IS_SCALAR(in))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot delete from scalar")));
+
+ if (JB_ROOT_COUNT(in) == 0)
+ PG_RETURN_JSONB(in);
+
+ deconstruct_array(keys, TEXTOID, -1, false, 'i',
+ &keys_elems, &keys_nulls, &keys_len);
+
+ if (keys_len == 0)
+ PG_RETURN_JSONB(in);
+
+ it = JsonbIteratorInit(&in->root);
+
+ while ((r = JsonbIteratorNext(&it, &v, skipNested)) != 0)
+ {
+ skipNested = true;
+
+ if ((r == WJB_ELEM || r == WJB_KEY) && v.type == jbvString)
+ {
+ int i;
+ bool found = false;
+
+ for (i = 0; i < keys_len; i++)
+ {
+ char *keyptr;
+ int keylen;
+
+ if (keys_nulls[i])
+ continue;
+
+ keyptr = VARDATA_ANY(keys_elems[i]);
+ keylen = VARSIZE_ANY_EXHDR(keys_elems[i]);
+ if (keylen == v.val.string.len &&
+ memcmp(keyptr, v.val.string.val, keylen) == 0)
+ {
+ found = true;
+ break;
+ }
+ }
+ if (found)
+ {
+ /* skip corresponding value as well */
+ if (r == WJB_KEY)
+ JsonbIteratorNext(&it, &v, true);
+
+ continue;
+ }
+ }
+
+ res = pushJsonbValue(&state, r, r < WJB_BEGIN_ARRAY ? &v : NULL);
+ }
+
+ Assert(res != NULL);
+
+ PG_RETURN_JSONB(JsonbValueToJsonb(res));
+}
+
/*
* SQL function jsonb_delete (jsonb, int)
*
diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h
index b1d25b5efb..45feb69b93 100644
--- a/src/include/catalog/pg_operator.h
+++ b/src/include/catalog/pg_operator.h
@@ -1826,6 +1826,8 @@ DATA(insert OID = 3284 ( "||" PGNSP PGUID b f f 3802 3802 3802 0 0 jsonb_con
DESCR("concatenate");
DATA(insert OID = 3285 ( "-" PGNSP PGUID b f f 3802 25 3802 0 0 3302 - - ));
DESCR("delete object field");
+DATA(insert OID = 3398 ( "-" PGNSP PGUID b f f 3802 1009 3802 0 0 3343 - -));
+DESCR("delete object fields");
DATA(insert OID = 3286 ( "-" PGNSP PGUID b f f 3802 23 3802 0 0 3303 - - ));
DESCR("delete array element");
DATA(insert OID = 3287 ( "#-" PGNSP PGUID b f f 3802 1009 3802 0 0 jsonb_delete_path - - ));
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 1a0eba3ca1..03f55a1cc5 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -957,7 +957,7 @@ DESCR("name of the current database");
DATA(insert OID = 817 ( current_query PGNSP PGUID 12 1 0 0 0 f f f f f f v r 0 0 25 "" _null_ _null_ _null_ _null_ _null_ current_query _null_ _null_ _null_ ));
DESCR("get the currently executing query");
-DATA(insert OID = 3343 ( int8_mul_cash PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 790 "20 790" _null_ _null_ _null_ _null_ _null_ int8_mul_cash _null_ _null_ _null_ ));
+DATA(insert OID = 3399 ( int8_mul_cash PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 790 "20 790" _null_ _null_ _null_ _null_ _null_ int8_mul_cash _null_ _null_ _null_ ));
DATA(insert OID = 862 ( int4_mul_cash PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 790 "23 790" _null_ _null_ _null_ _null_ _null_ int4_mul_cash _null_ _null_ _null_ ));
DATA(insert OID = 863 ( int2_mul_cash PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 790 "21 790" _null_ _null_ _null_ _null_ _null_ int2_mul_cash _null_ _null_ _null_ ));
DATA(insert OID = 3344 ( cash_mul_int8 PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 790 "790 20" _null_ _null_ _null_ _null_ _null_ cash_mul_int8 _null_ _null_ _null_ ));
@@ -4903,6 +4903,7 @@ DESCR("GIN support");
DATA(insert OID = 3301 ( jsonb_concat PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3802 "3802 3802" _null_ _null_ _null_ _null_ _null_ jsonb_concat _null_ _null_ _null_ ));
DATA(insert OID = 3302 ( jsonb_delete PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3802 "3802 25" _null_ _null_ _null_ _null_ _null_ jsonb_delete _null_ _null_ _null_ ));
DATA(insert OID = 3303 ( jsonb_delete PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3802 "3802 23" _null_ _null_ _null_ _null_ _null_ jsonb_delete_idx _null_ _null_ _null_ ));
+DATA(insert OID = 3343 ( jsonb_delete PGNSP PGUID 12 1 0 25 0 f f f f t f i s 2 0 3802 "3802 1009" "{3802,1009}" "{i,v}" "{from_json,path_elems}" _null_ _null_ jsonb_delete_array _null_ _null_ _null_ ));
DATA(insert OID = 3304 ( jsonb_delete_path PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3802 "3802 1009" _null_ _null_ _null_ _null_ _null_ jsonb_delete_path _null_ _null_ _null_ ));
DATA(insert OID = 3305 ( jsonb_set PGNSP PGUID 12 1 0 0 0 f f f f t f i s 4 0 3802 "3802 1009 3802 16" _null_ _null_ _null_ _null_ _null_ jsonb_set _null_ _null_ _null_ ));
DESCR("Set part of a jsonb");
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out
index e2cb08a6fb..ba9b1d711e 100644
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -3095,6 +3095,24 @@ select '["a","b","c"]'::jsonb - -4;
["a", "b", "c"]
(1 row)
+select '{"a":1 , "b":2, "c":3}'::jsonb - '{b}'::text[];
+ ?column?
+------------------
+ {"a": 1, "c": 3}
+(1 row)
+
+select '{"a":1 , "b":2, "c":3}'::jsonb - '{c,b}'::text[];
+ ?column?
+----------
+ {"a": 1}
+(1 row)
+
+select '{"a":1 , "b":2, "c":3}'::jsonb - '{}'::text[];
+ ?column?
+--------------------------
+ {"a": 1, "b": 2, "c": 3}
+(1 row)
+
select jsonb_set('{"n":null, "a":1, "b":[1,2], "c":{"1":2}, "d":{"1":[2,3]}}'::jsonb, '{n}', '[1,2,3]');
jsonb_set
--------------------------------------------------------------------------
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql
index 6b4c796992..eb65a38197 100644
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -777,6 +777,10 @@ select '["a","b","c"]'::jsonb - -2;
select '["a","b","c"]'::jsonb - -3;
select '["a","b","c"]'::jsonb - -4;
+select '{"a":1 , "b":2, "c":3}'::jsonb - '{b}'::text[];
+select '{"a":1 , "b":2, "c":3}'::jsonb - '{c,b}'::text[];
+select '{"a":1 , "b":2, "c":3}'::jsonb - '{}'::text[];
+
select jsonb_set('{"n":null, "a":1, "b":[1,2], "c":{"1":2}, "d":{"1":[2,3]}}'::jsonb, '{n}', '[1,2,3]');
select jsonb_set('{"n":null, "a":1, "b":[1,2], "c":{"1":2}, "d":{"1":[2,3]}}'::jsonb, '{b,-1}', '[1,2,3]');
select jsonb_set('{"n":null, "a":1, "b":[1,2], "c":{"1":2}, "d":{"1":[2,3]}}'::jsonb, '{d,1,0}', '[1,2,3]');
--
cgit v1.2.3
From 69f4b9c85f168ae006929eec44fc44d569e846b9 Mon Sep 17 00:00:00 2001
From: Andres Freund
Date: Wed, 18 Jan 2017 12:46:50 -0800
Subject: Move targetlist SRF handling from expression evaluation to new
executor node.
Evaluation of set returning functions (SRFs_ in the targetlist (like SELECT
generate_series(1,5)) so far was done in the expression evaluation (i.e.
ExecEvalExpr()) and projection (i.e. ExecProject/ExecTargetList) code.
This meant that most executor nodes performing projection, and most
expression evaluation functions, had to deal with the possibility that an
evaluated expression could return a set of return values.
That's bad because it leads to repeated code in a lot of places. It also,
and that's my (Andres's) motivation, made it a lot harder to implement a
more efficient way of doing expression evaluation.
To fix this, introduce a new executor node (ProjectSet) that can evaluate
targetlists containing one or more SRFs. To avoid the complexity of the old
way of handling nested expressions returning sets (e.g. having to pass up
ExprDoneCond, and dealing with arguments to functions returning sets etc.),
those SRFs can only be at the top level of the node's targetlist. The
planner makes sure (via split_pathtarget_at_srfs()) that SRF evaluation is
only necessary in ProjectSet nodes and that SRFs are only present at the
top level of the node's targetlist. If there are nested SRFs the planner
creates multiple stacked ProjectSet nodes. The ProjectSet nodes always get
input from an underlying node.
We also discussed and prototyped evaluating targetlist SRFs using ROWS
FROM(), but that turned out to be more complicated than we'd hoped.
While moving SRF evaluation to ProjectSet would allow to retain the old
"least common multiple" behavior when multiple SRFs are present in one
targetlist (i.e. continue returning rows until all SRFs are at the end of
their input at the same time), we decided to instead only return rows till
all SRFs are exhausted, returning NULL for already exhausted ones. We
deemed the previous behavior to be too confusing, unexpected and actually
not particularly useful.
As a side effect, the previously prohibited case of multiple set returning
arguments to a function, is now allowed. Not because it's particularly
desirable, but because it ends up working and there seems to be no argument
for adding code to prohibit it.
Currently the behavior for COALESCE and CASE containing SRFs has changed,
returning multiple rows from the expression, even when the SRF containing
"arm" of the expression is not evaluated. That's because the SRFs are
evaluated in a separate ProjectSet node. As that's quite confusing, we're
likely to instead prohibit SRFs in those places. But that's still being
discussed, and the code would reside in places not touched here, so that's
a task for later.
There's a lot of, now superfluous, code dealing with set return expressions
around. But as the changes to get rid of those are verbose largely boring,
it seems better for readability to keep the cleanup as a separate commit.
Author: Tom Lane and Andres Freund
Discussion: https://postgr.es/m/20160822214023.aaxz5l4igypowyri@alap3.anarazel.de
---
doc/src/sgml/xfunc.sgml | 52 ++++--
src/backend/commands/explain.c | 3 +
src/backend/executor/Makefile | 5 +-
src/backend/executor/execAmi.c | 5 +
src/backend/executor/execProcnode.c | 16 +-
src/backend/executor/execQual.c | 154 ++++++++--------
src/backend/executor/nodeProjectSet.c | 300 +++++++++++++++++++++++++++++++
src/backend/nodes/copyfuncs.c | 19 ++
src/backend/nodes/outfuncs.c | 24 +++
src/backend/nodes/readfuncs.c | 15 ++
src/backend/optimizer/README | 1 +
src/backend/optimizer/path/allpaths.c | 4 +
src/backend/optimizer/plan/createplan.c | 68 +++++++
src/backend/optimizer/plan/planner.c | 219 ++++++++++++++++++----
src/backend/optimizer/plan/setrefs.c | 3 +
src/backend/optimizer/plan/subselect.c | 2 +
src/backend/optimizer/util/clauses.c | 104 ++---------
src/backend/optimizer/util/pathnode.c | 66 +++++++
src/backend/optimizer/util/tlist.c | 199 ++++++++++++++++++++
src/include/executor/executor.h | 4 +
src/include/executor/nodeProjectSet.h | 24 +++
src/include/nodes/execnodes.h | 20 ++-
src/include/nodes/nodes.h | 3 +
src/include/nodes/plannodes.h | 11 ++
src/include/nodes/relation.h | 11 ++
src/include/optimizer/clauses.h | 1 -
src/include/optimizer/pathnode.h | 4 +
src/include/optimizer/tlist.h | 3 +
src/test/regress/expected/aggregates.out | 5 +-
src/test/regress/expected/limit.out | 22 ++-
src/test/regress/expected/portals.out | 16 +-
src/test/regress/expected/rangefuncs.out | 10 +-
src/test/regress/expected/subselect.out | 43 +++--
src/test/regress/expected/tsrf.out | 19 +-
src/test/regress/expected/union.out | 5 +-
35 files changed, 1186 insertions(+), 274 deletions(-)
create mode 100644 src/backend/executor/nodeProjectSet.c
create mode 100644 src/include/executor/nodeProjectSet.h
(limited to 'doc/src')
diff --git a/doc/src/sgml/xfunc.sgml b/doc/src/sgml/xfunc.sgml
index f2f379870f..09427bbed2 100644
--- a/doc/src/sgml/xfunc.sgml
+++ b/doc/src/sgml/xfunc.sgml
@@ -962,12 +962,11 @@ SELECT name, child FROM nodes, LATERAL listchildren(name) AS child;
- Currently, functions returning sets can also be called in the select list
+ Functions returning sets can also be called in the select list
of a query. For each row that the query
- generates by itself, the function returning set is invoked, and an output
- row is generated for each element of the function's result set. Note,
- however, that this capability is deprecated and might be removed in future
- releases. The previous example could also be done with queries like
+ generates by itself, the set-returning function is invoked, and an output
+ row is generated for each element of the function's result set.
+ The previous example could also be done with queries like
these:
@@ -998,6 +997,33 @@ SELECT name, listchildren(name) FROM nodes;
the LATERAL> syntax.
+
+ If there is more than one set-returning function in the same select
+ list, the behavior is similar to what you get from putting the functions
+ into a single LATERAL ROWS FROM( ... )> FROM>-clause
+ item. For each row from the underlying query, there is an output row
+ using the first result from each function, then an output row using the
+ second result, and so on. If some of the set-returning functions
+ produce fewer outputs than others, null values are substituted for the
+ missing data, so that the total number of rows emitted for one
+ underlying row is the same as for the set-returning function that
+ produced the most outputs.
+
+
+
+ Set-returning functions can be nested in a select list, although that is
+ not allowed in FROM>-clause items. In such cases, each level
+ of nesting is treated separately, as though it were
+ another LATERAL ROWS FROM( ... )> item. For example, in
+
+SELECT srf1(srf2(x), srf3(y)), srf4(srf5(z)) FROM ...
+
+ the set-returning functions srf2>, srf3>,
+ and srf5> would be run in lockstep for each row of the
+ underlying query, and then srf1> and srf4> would
+ be applied in lockstep to each row produced by the lower functions.
+
+
If a function's last command is INSERT>, UPDATE>,
@@ -1012,14 +1038,14 @@ SELECT name, listchildren(name) FROM nodes;
- The key problem with using set-returning functions in the select list,
- rather than the FROM> clause, is that putting more than one
- set-returning function in the same select list does not behave very
- sensibly. (What you actually get if you do so is a number of output
- rows equal to the least common multiple of the numbers of rows produced
- by each set-returning function.) The LATERAL> syntax
- produces less surprising results when calling multiple set-returning
- functions, and should usually be used instead.
+ Before PostgreSQL> 10, putting more than one
+ set-returning function in the same select list did not behave very
+ sensibly unless they always produced equal numbers of rows. Otherwise,
+ what you got was a number of output rows equal to the least common
+ multiple of the numbers of rows produced by the set-returning
+ functions. Furthermore, nested set-returning functions did not work at
+ all. Use of the LATERAL> syntax is recommended when writing
+ queries that need to work in older PostgreSQL> versions.
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index ee7046c47b..f9fb27658f 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -852,6 +852,9 @@ ExplainNode(PlanState *planstate, List *ancestors,
case T_Result:
pname = sname = "Result";
break;
+ case T_ProjectSet:
+ pname = sname = "ProjectSet";
+ break;
case T_ModifyTable:
sname = "ModifyTable";
switch (((ModifyTable *) plan)->operation)
diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile
index 51edd4c5e7..c51415830a 100644
--- a/src/backend/executor/Makefile
+++ b/src/backend/executor/Makefile
@@ -17,11 +17,12 @@ OBJS = execAmi.o execCurrent.o execGrouping.o execIndexing.o execJunk.o \
execScan.o execTuples.o \
execUtils.o functions.o instrument.o nodeAppend.o nodeAgg.o \
nodeBitmapAnd.o nodeBitmapOr.o \
- nodeBitmapHeapscan.o nodeBitmapIndexscan.o nodeCustom.o nodeGather.o \
+ nodeBitmapHeapscan.o nodeBitmapIndexscan.o \
+ nodeCustom.o nodeFunctionscan.o nodeGather.o \
nodeHash.o nodeHashjoin.o nodeIndexscan.o nodeIndexonlyscan.o \
nodeLimit.o nodeLockRows.o \
nodeMaterial.o nodeMergeAppend.o nodeMergejoin.o nodeModifyTable.o \
- nodeNestloop.o nodeFunctionscan.o nodeRecursiveunion.o nodeResult.o \
+ nodeNestloop.o nodeProjectSet.o nodeRecursiveunion.o nodeResult.o \
nodeSamplescan.o nodeSeqscan.o nodeSetOp.o nodeSort.o nodeUnique.o \
nodeValuesscan.o nodeCtescan.o nodeWorktablescan.o \
nodeGroup.o nodeSubplan.o nodeSubqueryscan.o nodeTidscan.o \
diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c
index 3ea36979b3..b52cfaa41f 100644
--- a/src/backend/executor/execAmi.c
+++ b/src/backend/executor/execAmi.c
@@ -39,6 +39,7 @@
#include "executor/nodeMergejoin.h"
#include "executor/nodeModifyTable.h"
#include "executor/nodeNestloop.h"
+#include "executor/nodeProjectSet.h"
#include "executor/nodeRecursiveunion.h"
#include "executor/nodeResult.h"
#include "executor/nodeSamplescan.h"
@@ -130,6 +131,10 @@ ExecReScan(PlanState *node)
ExecReScanResult((ResultState *) node);
break;
+ case T_ProjectSetState:
+ ExecReScanProjectSet((ProjectSetState *) node);
+ break;
+
case T_ModifyTableState:
ExecReScanModifyTable((ModifyTableState *) node);
break;
diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c
index b8edd36470..0dd95c6d17 100644
--- a/src/backend/executor/execProcnode.c
+++ b/src/backend/executor/execProcnode.c
@@ -88,6 +88,7 @@
#include "executor/nodeCustom.h"
#include "executor/nodeForeignscan.h"
#include "executor/nodeFunctionscan.h"
+#include "executor/nodeGather.h"
#include "executor/nodeGroup.h"
#include "executor/nodeHash.h"
#include "executor/nodeHashjoin.h"
@@ -100,7 +101,7 @@
#include "executor/nodeMergejoin.h"
#include "executor/nodeModifyTable.h"
#include "executor/nodeNestloop.h"
-#include "executor/nodeGather.h"
+#include "executor/nodeProjectSet.h"
#include "executor/nodeRecursiveunion.h"
#include "executor/nodeResult.h"
#include "executor/nodeSamplescan.h"
@@ -155,6 +156,11 @@ ExecInitNode(Plan *node, EState *estate, int eflags)
estate, eflags);
break;
+ case T_ProjectSet:
+ result = (PlanState *) ExecInitProjectSet((ProjectSet *) node,
+ estate, eflags);
+ break;
+
case T_ModifyTable:
result = (PlanState *) ExecInitModifyTable((ModifyTable *) node,
estate, eflags);
@@ -392,6 +398,10 @@ ExecProcNode(PlanState *node)
result = ExecResult((ResultState *) node);
break;
+ case T_ProjectSetState:
+ result = ExecProjectSet((ProjectSetState *) node);
+ break;
+
case T_ModifyTableState:
result = ExecModifyTable((ModifyTableState *) node);
break;
@@ -634,6 +644,10 @@ ExecEndNode(PlanState *node)
ExecEndResult((ResultState *) node);
break;
+ case T_ProjectSetState:
+ ExecEndProjectSet((ProjectSetState *) node);
+ break;
+
case T_ModifyTableState:
ExecEndModifyTable((ModifyTableState *) node);
break;
diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c
index bf007b7efd..eed7e95c75 100644
--- a/src/backend/executor/execQual.c
+++ b/src/backend/executor/execQual.c
@@ -29,9 +29,9 @@
* instead of doing needless copying. -cim 5/31/91
*
* During expression evaluation, we check_stack_depth only in
- * ExecMakeFunctionResult (and substitute routines) rather than at every
- * single node. This is a compromise that trades off precision of the
- * stack limit setting to gain speed.
+ * ExecMakeFunctionResultSet/ExecMakeFunctionResultNoSets rather than at
+ * every single node. This is a compromise that trades off precision of
+ * the stack limit setting to gain speed.
*/
#include "postgres.h"
@@ -92,7 +92,7 @@ static Datum ExecEvalParamExec(ExprState *exprstate, ExprContext *econtext,
static Datum ExecEvalParamExtern(ExprState *exprstate, ExprContext *econtext,
bool *isNull, ExprDoneCond *isDone);
static void init_fcache(Oid foid, Oid input_collation, FuncExprState *fcache,
- MemoryContext fcacheCxt, bool needDescForSets);
+ MemoryContext fcacheCxt, bool allowSRF, bool needDescForSRF);
static void ShutdownFuncExpr(Datum arg);
static TupleDesc get_cached_rowtype(Oid type_id, int32 typmod,
TupleDesc *cache_field, ExprContext *econtext);
@@ -104,10 +104,6 @@ static void ExecPrepareTuplestoreResult(FuncExprState *fcache,
Tuplestorestate *resultStore,
TupleDesc resultDesc);
static void tupledesc_match(TupleDesc dst_tupdesc, TupleDesc src_tupdesc);
-static Datum ExecMakeFunctionResult(FuncExprState *fcache,
- ExprContext *econtext,
- bool *isNull,
- ExprDoneCond *isDone);
static Datum ExecMakeFunctionResultNoSets(FuncExprState *fcache,
ExprContext *econtext,
bool *isNull, ExprDoneCond *isDone);
@@ -1327,7 +1323,7 @@ GetAttributeByName(HeapTupleHeader tuple, const char *attname, bool *isNull)
*/
static void
init_fcache(Oid foid, Oid input_collation, FuncExprState *fcache,
- MemoryContext fcacheCxt, bool needDescForSets)
+ MemoryContext fcacheCxt, bool allowSRF, bool needDescForSRF)
{
AclResult aclresult;
@@ -1360,8 +1356,17 @@ init_fcache(Oid foid, Oid input_collation, FuncExprState *fcache,
list_length(fcache->args),
input_collation, NULL, NULL);
+ /* If function returns set, check if that's allowed by caller */
+ if (fcache->func.fn_retset && !allowSRF)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("set-valued function called in context that cannot accept a set")));
+
+ /* Otherwise, ExecInitExpr should have marked the fcache correctly */
+ Assert(fcache->func.fn_retset == fcache->funcReturnsSet);
+
/* If function returns set, prepare expected tuple descriptor */
- if (fcache->func.fn_retset && needDescForSets)
+ if (fcache->func.fn_retset && needDescForSRF)
{
TypeFuncClass functypclass;
Oid funcrettype;
@@ -1549,7 +1554,7 @@ ExecEvalFuncArgs(FunctionCallInfo fcinfo,
/*
* ExecPrepareTuplestoreResult
*
- * Subroutine for ExecMakeFunctionResult: prepare to extract rows from a
+ * Subroutine for ExecMakeFunctionResultSet: prepare to extract rows from a
* tuplestore function result. We must set up a funcResultSlot (unless
* already done in a previous call cycle) and verify that the function
* returned the expected tuple descriptor.
@@ -1673,19 +1678,17 @@ tupledesc_match(TupleDesc dst_tupdesc, TupleDesc src_tupdesc)
}
/*
- * ExecMakeFunctionResult
+ * ExecMakeFunctionResultSet
*
- * Evaluate the arguments to a function and then the function itself.
- * init_fcache is presumed already run on the FuncExprState.
- *
- * This function handles the most general case, wherein the function or
- * one of its arguments can return a set.
+ * Evaluate the arguments to a set-returning function and then call the
+ * function itself. The argument expressions may not contain set-returning
+ * functions (the planner is supposed to have separated evaluation for those).
*/
-static Datum
-ExecMakeFunctionResult(FuncExprState *fcache,
- ExprContext *econtext,
- bool *isNull,
- ExprDoneCond *isDone)
+Datum
+ExecMakeFunctionResultSet(FuncExprState *fcache,
+ ExprContext *econtext,
+ bool *isNull,
+ ExprDoneCond *isDone)
{
List *arguments;
Datum result;
@@ -1701,6 +1704,31 @@ restart:
/* Guard against stack overflow due to overly complex expressions */
check_stack_depth();
+ /*
+ * Initialize function cache if first time through. The expression node
+ * could be either a FuncExpr or an OpExpr.
+ */
+ if (fcache->func.fn_oid == InvalidOid)
+ {
+ if (IsA(fcache->xprstate.expr, FuncExpr))
+ {
+ FuncExpr *func = (FuncExpr *) fcache->xprstate.expr;
+
+ init_fcache(func->funcid, func->inputcollid, fcache,
+ econtext->ecxt_per_query_memory, true, true);
+ }
+ else if (IsA(fcache->xprstate.expr, OpExpr))
+ {
+ OpExpr *op = (OpExpr *) fcache->xprstate.expr;
+
+ init_fcache(op->opfuncid, op->inputcollid, fcache,
+ econtext->ecxt_per_query_memory, true, true);
+ }
+ else
+ elog(ERROR, "unrecognized node type: %d",
+ (int) nodeTag(fcache->xprstate.expr));
+ }
+
/*
* If a previous call of the function returned a set result in the form of
* a tuplestore, continue reading rows from the tuplestore until it's
@@ -1750,19 +1778,11 @@ restart:
if (!fcache->setArgsValid)
{
argDone = ExecEvalFuncArgs(fcinfo, arguments, econtext);
- if (argDone == ExprEndResult)
- {
- /* input is an empty set, so return an empty set. */
- *isNull = true;
- if (isDone)
- *isDone = ExprEndResult;
- else
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("set-valued function called in context that cannot accept a set")));
- return (Datum) 0;
- }
- hasSetArg = (argDone != ExprSingleResult);
+ if (argDone != ExprSingleResult)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("set-valued function called in context that cannot accept a set")));
+ hasSetArg = false;
}
else
{
@@ -1989,8 +2009,8 @@ restart:
/*
* ExecMakeFunctionResultNoSets
*
- * Simplified version of ExecMakeFunctionResult that can only handle
- * non-set cases. Hand-tuned for speed.
+ * Evaluate a function or operator node with a non-set-returning function.
+ * Assumes init_fcache() already done. Hand-tuned for speed.
*/
static Datum
ExecMakeFunctionResultNoSets(FuncExprState *fcache,
@@ -2120,7 +2140,7 @@ ExecMakeTableFunctionResult(ExprState *funcexpr,
ExprDoneCond argDone;
/*
- * This path is similar to ExecMakeFunctionResult.
+ * This path is similar to ExecMakeFunctionResultSet.
*/
direct_function_call = true;
@@ -2132,7 +2152,7 @@ ExecMakeTableFunctionResult(ExprState *funcexpr,
FuncExpr *func = (FuncExpr *) fcache->xprstate.expr;
init_fcache(func->funcid, func->inputcollid, fcache,
- econtext->ecxt_per_query_memory, false);
+ econtext->ecxt_per_query_memory, true, false);
}
returnsSet = fcache->func.fn_retset;
InitFunctionCallInfoData(fcinfo, &(fcache->func),
@@ -2423,24 +2443,11 @@ ExecEvalFunc(FuncExprState *fcache,
/* Initialize function lookup info */
init_fcache(func->funcid, func->inputcollid, fcache,
- econtext->ecxt_per_query_memory, true);
+ econtext->ecxt_per_query_memory, false, false);
- /*
- * We need to invoke ExecMakeFunctionResult if either the function itself
- * or any of its input expressions can return a set. Otherwise, invoke
- * ExecMakeFunctionResultNoSets. In either case, change the evalfunc
- * pointer to go directly there on subsequent uses.
- */
- if (fcache->func.fn_retset || expression_returns_set((Node *) func->args))
- {
- fcache->xprstate.evalfunc = (ExprStateEvalFunc) ExecMakeFunctionResult;
- return ExecMakeFunctionResult(fcache, econtext, isNull, isDone);
- }
- else
- {
- fcache->xprstate.evalfunc = (ExprStateEvalFunc) ExecMakeFunctionResultNoSets;
- return ExecMakeFunctionResultNoSets(fcache, econtext, isNull, isDone);
- }
+ /* Change the evalfunc pointer to save a few cycles in additional calls */
+ fcache->xprstate.evalfunc = (ExprStateEvalFunc) ExecMakeFunctionResultNoSets;
+ return ExecMakeFunctionResultNoSets(fcache, econtext, isNull, isDone);
}
/* ----------------------------------------------------------------
@@ -2458,24 +2465,11 @@ ExecEvalOper(FuncExprState *fcache,
/* Initialize function lookup info */
init_fcache(op->opfuncid, op->inputcollid, fcache,
- econtext->ecxt_per_query_memory, true);
+ econtext->ecxt_per_query_memory, false, false);
- /*
- * We need to invoke ExecMakeFunctionResult if either the function itself
- * or any of its input expressions can return a set. Otherwise, invoke
- * ExecMakeFunctionResultNoSets. In either case, change the evalfunc
- * pointer to go directly there on subsequent uses.
- */
- if (fcache->func.fn_retset || expression_returns_set((Node *) op->args))
- {
- fcache->xprstate.evalfunc = (ExprStateEvalFunc) ExecMakeFunctionResult;
- return ExecMakeFunctionResult(fcache, econtext, isNull, isDone);
- }
- else
- {
- fcache->xprstate.evalfunc = (ExprStateEvalFunc) ExecMakeFunctionResultNoSets;
- return ExecMakeFunctionResultNoSets(fcache, econtext, isNull, isDone);
- }
+ /* Change the evalfunc pointer to save a few cycles in additional calls */
+ fcache->xprstate.evalfunc = (ExprStateEvalFunc) ExecMakeFunctionResultNoSets;
+ return ExecMakeFunctionResultNoSets(fcache, econtext, isNull, isDone);
}
/* ----------------------------------------------------------------
@@ -2512,8 +2506,7 @@ ExecEvalDistinct(FuncExprState *fcache,
DistinctExpr *op = (DistinctExpr *) fcache->xprstate.expr;
init_fcache(op->opfuncid, op->inputcollid, fcache,
- econtext->ecxt_per_query_memory, true);
- Assert(!fcache->func.fn_retset);
+ econtext->ecxt_per_query_memory, false, false);
}
/*
@@ -2589,8 +2582,7 @@ ExecEvalScalarArrayOp(ScalarArrayOpExprState *sstate,
if (sstate->fxprstate.func.fn_oid == InvalidOid)
{
init_fcache(opexpr->opfuncid, opexpr->inputcollid, &sstate->fxprstate,
- econtext->ecxt_per_query_memory, true);
- Assert(!sstate->fxprstate.func.fn_retset);
+ econtext->ecxt_per_query_memory, false, false);
}
/*
@@ -3857,8 +3849,7 @@ ExecEvalNullIf(FuncExprState *nullIfExpr,
NullIfExpr *op = (NullIfExpr *) nullIfExpr->xprstate.expr;
init_fcache(op->opfuncid, op->inputcollid, nullIfExpr,
- econtext->ecxt_per_query_memory, true);
- Assert(!nullIfExpr->func.fn_retset);
+ econtext->ecxt_per_query_memory, false, false);
}
/*
@@ -4739,6 +4730,7 @@ ExecInitExpr(Expr *node, PlanState *parent)
fstate->args = (List *)
ExecInitExpr((Expr *) funcexpr->args, parent);
fstate->func.fn_oid = InvalidOid; /* not initialized */
+ fstate->funcReturnsSet = funcexpr->funcretset;
state = (ExprState *) fstate;
}
break;
@@ -4751,6 +4743,7 @@ ExecInitExpr(Expr *node, PlanState *parent)
fstate->args = (List *)
ExecInitExpr((Expr *) opexpr->args, parent);
fstate->func.fn_oid = InvalidOid; /* not initialized */
+ fstate->funcReturnsSet = opexpr->opretset;
state = (ExprState *) fstate;
}
break;
@@ -4763,6 +4756,7 @@ ExecInitExpr(Expr *node, PlanState *parent)
fstate->args = (List *)
ExecInitExpr((Expr *) distinctexpr->args, parent);
fstate->func.fn_oid = InvalidOid; /* not initialized */
+ fstate->funcReturnsSet = false; /* not supported */
state = (ExprState *) fstate;
}
break;
@@ -4775,6 +4769,7 @@ ExecInitExpr(Expr *node, PlanState *parent)
fstate->args = (List *)
ExecInitExpr((Expr *) nullifexpr->args, parent);
fstate->func.fn_oid = InvalidOid; /* not initialized */
+ fstate->funcReturnsSet = false; /* not supported */
state = (ExprState *) fstate;
}
break;
@@ -4787,6 +4782,7 @@ ExecInitExpr(Expr *node, PlanState *parent)
sstate->fxprstate.args = (List *)
ExecInitExpr((Expr *) opexpr->args, parent);
sstate->fxprstate.func.fn_oid = InvalidOid; /* not initialized */
+ sstate->fxprstate.funcReturnsSet = false; /* not supported */
sstate->element_type = InvalidOid; /* ditto */
state = (ExprState *) sstate;
}
diff --git a/src/backend/executor/nodeProjectSet.c b/src/backend/executor/nodeProjectSet.c
new file mode 100644
index 0000000000..391e97ea6f
--- /dev/null
+++ b/src/backend/executor/nodeProjectSet.c
@@ -0,0 +1,300 @@
+/*-------------------------------------------------------------------------
+ *
+ * nodeProjectSet.c
+ * support for evaluating targetlists containing set-returning functions
+ *
+ * DESCRIPTION
+ *
+ * ProjectSet nodes are inserted by the planner to evaluate set-returning
+ * functions in the targetlist. It's guaranteed that all set-returning
+ * functions are directly at the top level of the targetlist, i.e. they
+ * can't be inside more-complex expressions. If that'd otherwise be
+ * the case, the planner adds additional ProjectSet nodes.
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/executor/nodeProjectSet.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "executor/executor.h"
+#include "executor/nodeProjectSet.h"
+#include "utils/memutils.h"
+
+
+static TupleTableSlot *ExecProjectSRF(ProjectSetState *node, bool continuing);
+
+
+/* ----------------------------------------------------------------
+ * ExecProjectSet(node)
+ *
+ * Return tuples after evaluating the targetlist (which contains set
+ * returning functions).
+ * ----------------------------------------------------------------
+ */
+TupleTableSlot *
+ExecProjectSet(ProjectSetState *node)
+{
+ TupleTableSlot *outerTupleSlot;
+ TupleTableSlot *resultSlot;
+ PlanState *outerPlan;
+ ExprContext *econtext;
+
+ econtext = node->ps.ps_ExprContext;
+
+ /*
+ * Check to see if we're still projecting out tuples from a previous scan
+ * tuple (because there is a function-returning-set in the projection
+ * expressions). If so, try to project another one.
+ */
+ if (node->pending_srf_tuples)
+ {
+ resultSlot = ExecProjectSRF(node, true);
+
+ if (resultSlot != NULL)
+ return resultSlot;
+ }
+
+ /*
+ * Reset per-tuple memory context to free any expression evaluation
+ * storage allocated in the previous tuple cycle. Note this can't happen
+ * until we're done projecting out tuples from a scan tuple.
+ */
+ ResetExprContext(econtext);
+
+ /*
+ * Get another input tuple and project SRFs from it.
+ */
+ for (;;)
+ {
+ /*
+ * Retrieve tuples from the outer plan until there are no more.
+ */
+ outerPlan = outerPlanState(node);
+ outerTupleSlot = ExecProcNode(outerPlan);
+
+ if (TupIsNull(outerTupleSlot))
+ return NULL;
+
+ /*
+ * Prepare to compute projection expressions, which will expect to
+ * access the input tuples as varno OUTER.
+ */
+ econtext->ecxt_outertuple = outerTupleSlot;
+
+ /* Evaluate the expressions */
+ resultSlot = ExecProjectSRF(node, false);
+
+ /*
+ * Return the tuple unless the projection produced no rows (due to an
+ * empty set), in which case we must loop back to see if there are
+ * more outerPlan tuples.
+ */
+ if (resultSlot)
+ return resultSlot;
+ }
+
+ return NULL;
+}
+
+/* ----------------------------------------------------------------
+ * ExecProjectSRF
+ *
+ * Project a targetlist containing one or more set-returning functions.
+ *
+ * 'continuing' indicates whether to continue projecting rows for the
+ * same input tuple; or whether a new input tuple is being projected.
+ *
+ * Returns NULL if no output tuple has been produced.
+ *
+ * ----------------------------------------------------------------
+ */
+static TupleTableSlot *
+ExecProjectSRF(ProjectSetState *node, bool continuing)
+{
+ TupleTableSlot *resultSlot = node->ps.ps_ResultTupleSlot;
+ ExprContext *econtext = node->ps.ps_ExprContext;
+ bool hassrf PG_USED_FOR_ASSERTS_ONLY = false;
+ bool hasresult;
+ int argno;
+ ListCell *lc;
+
+ ExecClearTuple(resultSlot);
+
+ /*
+ * Assume no further tuples are produced unless an ExprMultipleResult is
+ * encountered from a set returning function.
+ */
+ node->pending_srf_tuples = false;
+
+ hasresult = false;
+ argno = 0;
+ foreach(lc, node->ps.targetlist)
+ {
+ GenericExprState *gstate = (GenericExprState *) lfirst(lc);
+ ExprDoneCond *isdone = &node->elemdone[argno];
+ Datum *result = &resultSlot->tts_values[argno];
+ bool *isnull = &resultSlot->tts_isnull[argno];
+
+ if (continuing && *isdone == ExprEndResult)
+ {
+ /*
+ * If we're continuing to project output rows from a source tuple,
+ * return NULLs once the SRF has been exhausted.
+ */
+ *result = (Datum) 0;
+ *isnull = true;
+ hassrf = true;
+ }
+ else if (IsA(gstate->arg, FuncExprState) &&
+ ((FuncExprState *) gstate->arg)->funcReturnsSet)
+ {
+ /*
+ * Evaluate SRF - possibly continuing previously started output.
+ */
+ *result = ExecMakeFunctionResultSet((FuncExprState *) gstate->arg,
+ econtext, isnull, isdone);
+
+ if (*isdone != ExprEndResult)
+ hasresult = true;
+ if (*isdone == ExprMultipleResult)
+ node->pending_srf_tuples = true;
+ hassrf = true;
+ }
+ else
+ {
+ /* Non-SRF tlist expression, just evaluate normally. */
+ *result = ExecEvalExpr(gstate->arg, econtext, isnull, NULL);
+ *isdone = ExprSingleResult;
+ }
+
+ argno++;
+ }
+
+ /* ProjectSet should not be used if there's no SRFs */
+ Assert(hassrf);
+
+ /*
+ * If all the SRFs returned EndResult, we consider that as no row being
+ * produced.
+ */
+ if (hasresult)
+ {
+ ExecStoreVirtualTuple(resultSlot);
+ return resultSlot;
+ }
+
+ return NULL;
+}
+
+/* ----------------------------------------------------------------
+ * ExecInitProjectSet
+ *
+ * Creates the run-time state information for the ProjectSet node
+ * produced by the planner and initializes outer relations
+ * (child nodes).
+ * ----------------------------------------------------------------
+ */
+ProjectSetState *
+ExecInitProjectSet(ProjectSet *node, EState *estate, int eflags)
+{
+ ProjectSetState *state;
+
+ /* check for unsupported flags */
+ Assert(!(eflags & (EXEC_FLAG_MARK | EXEC_FLAG_BACKWARD)));
+
+ /*
+ * create state structure
+ */
+ state = makeNode(ProjectSetState);
+ state->ps.plan = (Plan *) node;
+ state->ps.state = estate;
+
+ state->pending_srf_tuples = false;
+
+ /*
+ * Miscellaneous initialization
+ *
+ * create expression context for node
+ */
+ ExecAssignExprContext(estate, &state->ps);
+
+ /*
+ * tuple table initialization
+ */
+ ExecInitResultTupleSlot(estate, &state->ps);
+
+ /*
+ * initialize child expressions
+ */
+ state->ps.targetlist = (List *)
+ ExecInitExpr((Expr *) node->plan.targetlist,
+ (PlanState *) state);
+ Assert(node->plan.qual == NIL);
+
+ /*
+ * initialize child nodes
+ */
+ outerPlanState(state) = ExecInitNode(outerPlan(node), estate, eflags);
+
+ /*
+ * we don't use inner plan
+ */
+ Assert(innerPlan(node) == NULL);
+
+ /*
+ * initialize tuple type and projection info
+ */
+ ExecAssignResultTypeFromTL(&state->ps);
+
+ /* Create workspace for per-SRF is-done state */
+ state->nelems = list_length(node->plan.targetlist);
+ state->elemdone = (ExprDoneCond *)
+ palloc(sizeof(ExprDoneCond) * state->nelems);
+
+ return state;
+}
+
+/* ----------------------------------------------------------------
+ * ExecEndProjectSet
+ *
+ * frees up storage allocated through C routines
+ * ----------------------------------------------------------------
+ */
+void
+ExecEndProjectSet(ProjectSetState *node)
+{
+ /*
+ * Free the exprcontext
+ */
+ ExecFreeExprContext(&node->ps);
+
+ /*
+ * clean out the tuple table
+ */
+ ExecClearTuple(node->ps.ps_ResultTupleSlot);
+
+ /*
+ * shut down subplans
+ */
+ ExecEndNode(outerPlanState(node));
+}
+
+void
+ExecReScanProjectSet(ProjectSetState *node)
+{
+ /* Forget any incompletely-evaluated SRFs */
+ node->pending_srf_tuples = false;
+
+ /*
+ * If chgParam of subnode is not null then plan will be re-scanned by
+ * first ExecProcNode.
+ */
+ if (node->ps.lefttree->chgParam == NULL)
+ ExecReScan(node->ps.lefttree);
+}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 9f6a7e6154..f871e9d4bb 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -165,6 +165,22 @@ _copyResult(const Result *from)
return newnode;
}
+/*
+ * _copyProjectSet
+ */
+static ProjectSet *
+_copyProjectSet(const ProjectSet *from)
+{
+ ProjectSet *newnode = makeNode(ProjectSet);
+
+ /*
+ * copy node superclass fields
+ */
+ CopyPlanFields((const Plan *) from, (Plan *) newnode);
+
+ return newnode;
+}
+
/*
* _copyModifyTable
*/
@@ -4415,6 +4431,9 @@ copyObject(const void *from)
case T_Result:
retval = _copyResult(from);
break;
+ case T_ProjectSet:
+ retval = _copyProjectSet(from);
+ break;
case T_ModifyTable:
retval = _copyModifyTable(from);
break;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index c2ba38ecd6..1560ac3989 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -326,6 +326,14 @@ _outResult(StringInfo str, const Result *node)
WRITE_NODE_FIELD(resconstantqual);
}
+static void
+_outProjectSet(StringInfo str, const ProjectSet *node)
+{
+ WRITE_NODE_TYPE("PROJECTSET");
+
+ _outPlanInfo(str, (const Plan *) node);
+}
+
static void
_outModifyTable(StringInfo str, const ModifyTable *node)
{
@@ -1807,6 +1815,16 @@ _outProjectionPath(StringInfo str, const ProjectionPath *node)
WRITE_BOOL_FIELD(dummypp);
}
+static void
+_outProjectSetPath(StringInfo str, const ProjectSetPath *node)
+{
+ WRITE_NODE_TYPE("PROJECTSETPATH");
+
+ _outPathInfo(str, (const Path *) node);
+
+ WRITE_NODE_FIELD(subpath);
+}
+
static void
_outSortPath(StringInfo str, const SortPath *node)
{
@@ -3367,6 +3385,9 @@ outNode(StringInfo str, const void *obj)
case T_Result:
_outResult(str, obj);
break;
+ case T_ProjectSet:
+ _outProjectSet(str, obj);
+ break;
case T_ModifyTable:
_outModifyTable(str, obj);
break;
@@ -3679,6 +3700,9 @@ outNode(StringInfo str, const void *obj)
case T_ProjectionPath:
_outProjectionPath(str, obj);
break;
+ case T_ProjectSetPath:
+ _outProjectSetPath(str, obj);
+ break;
case T_SortPath:
_outSortPath(str, obj);
break;
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index e02dd94f05..dcfa6ee28d 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -1483,6 +1483,19 @@ _readResult(void)
READ_DONE();
}
+/*
+ * _readProjectSet
+ */
+static ProjectSet *
+_readProjectSet(void)
+{
+ READ_LOCALS_NO_FIELDS(ProjectSet);
+
+ ReadCommonPlan(&local_node->plan);
+
+ READ_DONE();
+}
+
/*
* _readModifyTable
*/
@@ -2450,6 +2463,8 @@ parseNodeString(void)
return_value = _readPlan();
else if (MATCH("RESULT", 6))
return_value = _readResult();
+ else if (MATCH("PROJECTSET", 10))
+ return_value = _readProjectSet();
else if (MATCH("MODIFYTABLE", 11))
return_value = _readModifyTable();
else if (MATCH("APPEND", 6))
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README
index 1998739702..7ae2b74b2c 100644
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -375,6 +375,7 @@ RelOptInfo - a relation or joined relations
UniquePath - remove duplicate rows (either by hashing or sorting)
GatherPath - collect the results of parallel workers
ProjectionPath - a Result plan node with child (used for projection)
+ ProjectSetPath - a ProjectSet plan node applied to some sub-path
SortPath - a Sort plan node applied to some sub-path
GroupPath - a Group plan node applied to some sub-path
UpperUniquePath - a Unique plan node applied to some sub-path
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 96ca7d3bb0..7c017fe1e4 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -3051,6 +3051,10 @@ print_path(PlannerInfo *root, Path *path, int indent)
ptype = "Projection";
subpath = ((ProjectionPath *) path)->subpath;
break;
+ case T_ProjectSetPath:
+ ptype = "ProjectSet";
+ subpath = ((ProjectSetPath *) path)->subpath;
+ break;
case T_SortPath:
ptype = "Sort";
subpath = ((SortPath *) path)->subpath;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index c4ada214ed..fae1f67b9c 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -81,6 +81,7 @@ static Plan *create_join_plan(PlannerInfo *root, JoinPath *best_path);
static Plan *create_append_plan(PlannerInfo *root, AppendPath *best_path);
static Plan *create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path);
static Result *create_result_plan(PlannerInfo *root, ResultPath *best_path);
+static ProjectSet *create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path);
static Material *create_material_plan(PlannerInfo *root, MaterialPath *best_path,
int flags);
static Plan *create_unique_plan(PlannerInfo *root, UniquePath *best_path,
@@ -264,6 +265,7 @@ static SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
long numGroups);
static LockRows *make_lockrows(Plan *lefttree, List *rowMarks, int epqParam);
static Result *make_result(List *tlist, Node *resconstantqual, Plan *subplan);
+static ProjectSet *make_project_set(List *tlist, Plan *subplan);
static ModifyTable *make_modifytable(PlannerInfo *root,
CmdType operation, bool canSetTag,
Index nominalRelation,
@@ -392,6 +394,10 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags)
(ResultPath *) best_path);
}
break;
+ case T_ProjectSet:
+ plan = (Plan *) create_project_set_plan(root,
+ (ProjectSetPath *) best_path);
+ break;
case T_Material:
plan = (Plan *) create_material_plan(root,
(MaterialPath *) best_path,
@@ -1141,6 +1147,31 @@ create_result_plan(PlannerInfo *root, ResultPath *best_path)
return plan;
}
+/*
+ * create_project_set_plan
+ * Create a ProjectSet plan for 'best_path'.
+ *
+ * Returns a Plan node.
+ */
+static ProjectSet *
+create_project_set_plan(PlannerInfo *root, ProjectSetPath *best_path)
+{
+ ProjectSet *plan;
+ Plan *subplan;
+ List *tlist;
+
+ /* Since we intend to project, we don't need to constrain child tlist */
+ subplan = create_plan_recurse(root, best_path->subpath, 0);
+
+ tlist = build_path_tlist(root, &best_path->path);
+
+ plan = make_project_set(tlist, subplan);
+
+ copy_generic_path_info(&plan->plan, (Path *) best_path);
+
+ return plan;
+}
+
/*
* create_material_plan
* Create a Material plan for 'best_path' and (recursively) plans
@@ -6063,6 +6094,25 @@ make_result(List *tlist,
return node;
}
+/*
+ * make_project_set
+ * Build a ProjectSet plan node
+ */
+static ProjectSet *
+make_project_set(List *tlist,
+ Plan *subplan)
+{
+ ProjectSet *node = makeNode(ProjectSet);
+ Plan *plan = &node->plan;
+
+ plan->targetlist = tlist;
+ plan->qual = NIL;
+ plan->lefttree = subplan;
+ plan->righttree = NULL;
+
+ return node;
+}
+
/*
* make_modifytable
* Build a ModifyTable plan node
@@ -6229,6 +6279,15 @@ is_projection_capable_path(Path *path)
* projection to its dummy path.
*/
return IS_DUMMY_PATH(path);
+ case T_ProjectSet:
+
+ /*
+ * Although ProjectSet certainly projects, say "no" because we
+ * don't want the planner to randomly replace its tlist with
+ * something else; the SRFs have to stay at top level. This might
+ * get relaxed later.
+ */
+ return false;
default:
break;
}
@@ -6257,6 +6316,15 @@ is_projection_capable_plan(Plan *plan)
case T_MergeAppend:
case T_RecursiveUnion:
return false;
+ case T_ProjectSet:
+
+ /*
+ * Although ProjectSet certainly projects, say "no" because we
+ * don't want the planner to randomly replace its tlist with
+ * something else; the SRFs have to stay at top level. This might
+ * get relaxed later.
+ */
+ return false;
default:
break;
}
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 25f2c5a614..4b5902fc3e 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -153,6 +153,8 @@ static List *make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
static PathTarget *make_sort_input_target(PlannerInfo *root,
PathTarget *final_target,
bool *have_postponed_srfs);
+static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
+ List *targets, List *targets_contain_srfs);
/*****************************************************************************
@@ -1400,8 +1402,9 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
int64 count_est = 0;
double limit_tuples = -1.0;
bool have_postponed_srfs = false;
- double tlist_rows;
PathTarget *final_target;
+ List *final_targets;
+ List *final_targets_contain_srfs;
RelOptInfo *current_rel;
RelOptInfo *final_rel;
ListCell *lc;
@@ -1464,6 +1467,10 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
/* Also extract the PathTarget form of the setop result tlist */
final_target = current_rel->cheapest_total_path->pathtarget;
+ /* The setop result tlist couldn't contain any SRFs */
+ Assert(!parse->hasTargetSRFs);
+ final_targets = final_targets_contain_srfs = NIL;
+
/*
* Can't handle FOR [KEY] UPDATE/SHARE here (parser should have
* checked already, but let's make sure).
@@ -1489,8 +1496,14 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
{
/* No set operations, do regular planning */
PathTarget *sort_input_target;
+ List *sort_input_targets;
+ List *sort_input_targets_contain_srfs;
PathTarget *grouping_target;
+ List *grouping_targets;
+ List *grouping_targets_contain_srfs;
PathTarget *scanjoin_target;
+ List *scanjoin_targets;
+ List *scanjoin_targets_contain_srfs;
bool have_grouping;
AggClauseCosts agg_costs;
WindowFuncLists *wflists = NULL;
@@ -1735,8 +1748,50 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
scanjoin_target = grouping_target;
/*
- * Forcibly apply scan/join target to all the Paths for the scan/join
- * rel.
+ * If there are any SRFs in the targetlist, we must separate each of
+ * these PathTargets into SRF-computing and SRF-free targets. Replace
+ * each of the named targets with a SRF-free version, and remember the
+ * list of additional projection steps we need to add afterwards.
+ */
+ if (parse->hasTargetSRFs)
+ {
+ /* final_target doesn't recompute any SRFs in sort_input_target */
+ split_pathtarget_at_srfs(root, final_target, sort_input_target,
+ &final_targets,
+ &final_targets_contain_srfs);
+ final_target = (PathTarget *) linitial(final_targets);
+ Assert(!linitial_int(final_targets_contain_srfs));
+ /* likewise for sort_input_target vs. grouping_target */
+ split_pathtarget_at_srfs(root, sort_input_target, grouping_target,
+ &sort_input_targets,
+ &sort_input_targets_contain_srfs);
+ sort_input_target = (PathTarget *) linitial(sort_input_targets);
+ Assert(!linitial_int(sort_input_targets_contain_srfs));
+ /* likewise for grouping_target vs. scanjoin_target */
+ split_pathtarget_at_srfs(root, grouping_target, scanjoin_target,
+ &grouping_targets,
+ &grouping_targets_contain_srfs);
+ grouping_target = (PathTarget *) linitial(grouping_targets);
+ Assert(!linitial_int(grouping_targets_contain_srfs));
+ /* scanjoin_target will not have any SRFs precomputed for it */
+ split_pathtarget_at_srfs(root, scanjoin_target, NULL,
+ &scanjoin_targets,
+ &scanjoin_targets_contain_srfs);
+ scanjoin_target = (PathTarget *) linitial(scanjoin_targets);
+ Assert(!linitial_int(scanjoin_targets_contain_srfs));
+ }
+ else
+ {
+ /* initialize lists, just to keep compiler quiet */
+ final_targets = final_targets_contain_srfs = NIL;
+ sort_input_targets = sort_input_targets_contain_srfs = NIL;
+ grouping_targets = grouping_targets_contain_srfs = NIL;
+ scanjoin_targets = scanjoin_targets_contain_srfs = NIL;
+ }
+
+ /*
+ * Forcibly apply SRF-free scan/join target to all the Paths for the
+ * scan/join rel.
*
* In principle we should re-run set_cheapest() here to identify the
* cheapest path, but it seems unlikely that adding the same tlist
@@ -1807,6 +1862,12 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
current_rel->partial_pathlist = NIL;
}
+ /* Now fix things up if scan/join target contains SRFs */
+ if (parse->hasTargetSRFs)
+ adjust_paths_for_srfs(root, current_rel,
+ scanjoin_targets,
+ scanjoin_targets_contain_srfs);
+
/*
* Save the various upper-rel PathTargets we just computed into
* root->upper_targets[]. The core code doesn't use this, but it
@@ -1831,6 +1892,11 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
&agg_costs,
rollup_lists,
rollup_groupclauses);
+ /* Fix things up if grouping_target contains SRFs */
+ if (parse->hasTargetSRFs)
+ adjust_paths_for_srfs(root, current_rel,
+ grouping_targets,
+ grouping_targets_contain_srfs);
}
/*
@@ -1846,6 +1912,11 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
tlist,
wflists,
activeWindows);
+ /* Fix things up if sort_input_target contains SRFs */
+ if (parse->hasTargetSRFs)
+ adjust_paths_for_srfs(root, current_rel,
+ sort_input_targets,
+ sort_input_targets_contain_srfs);
}
/*
@@ -1874,40 +1945,11 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
final_target,
have_postponed_srfs ? -1.0 :
limit_tuples);
- }
-
- /*
- * If there are set-returning functions in the tlist, scale up the output
- * rowcounts of all surviving Paths to account for that. Note that if any
- * SRFs appear in sorting or grouping columns, we'll have underestimated
- * the numbers of rows passing through earlier steps; but that's such a
- * weird usage that it doesn't seem worth greatly complicating matters to
- * account for it.
- */
- if (parse->hasTargetSRFs)
- tlist_rows = tlist_returns_set_rows(tlist);
- else
- tlist_rows = 1;
-
- if (tlist_rows > 1)
- {
- foreach(lc, current_rel->pathlist)
- {
- Path *path = (Path *) lfirst(lc);
-
- /*
- * We assume that execution costs of the tlist as such were
- * already accounted for. However, it still seems appropriate to
- * charge something more for the executor's general costs of
- * processing the added tuples. The cost is probably less than
- * cpu_tuple_cost, though, so we arbitrarily use half of that.
- */
- path->total_cost += path->rows * (tlist_rows - 1) *
- cpu_tuple_cost / 2;
-
- path->rows *= tlist_rows;
- }
- /* No need to run set_cheapest; we're keeping all paths anyway. */
+ /* Fix things up if final_target contains SRFs */
+ if (parse->hasTargetSRFs)
+ adjust_paths_for_srfs(root, current_rel,
+ final_targets,
+ final_targets_contain_srfs);
}
/*
@@ -5101,6 +5143,109 @@ get_cheapest_fractional_path(RelOptInfo *rel, double tuple_fraction)
return best_path;
}
+/*
+ * adjust_paths_for_srfs
+ * Fix up the Paths of the given upperrel to handle tSRFs properly.
+ *
+ * The executor can only handle set-returning functions that appear at the
+ * top level of the targetlist of a ProjectSet plan node. If we have any SRFs
+ * that are not at top level, we need to split up the evaluation into multiple
+ * plan levels in which each level satisfies this constraint. This function
+ * modifies each Path of an upperrel that (might) compute any SRFs in its
+ * output tlist to insert appropriate projection steps.
+ *
+ * The given targets and targets_contain_srfs lists are from
+ * split_pathtarget_at_srfs(). We assume the existing Paths emit the first
+ * target in targets.
+ */
+static void
+adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel,
+ List *targets, List *targets_contain_srfs)
+{
+ ListCell *lc;
+
+ Assert(list_length(targets) == list_length(targets_contain_srfs));
+ Assert(!linitial_int(targets_contain_srfs));
+
+ /* If no SRFs appear at this plan level, nothing to do */
+ if (list_length(targets) == 1)
+ return;
+
+ /*
+ * Stack SRF-evaluation nodes atop each path for the rel.
+ *
+ * In principle we should re-run set_cheapest() here to identify the
+ * cheapest path, but it seems unlikely that adding the same tlist eval
+ * costs to all the paths would change that, so we don't bother. Instead,
+ * just assume that the cheapest-startup and cheapest-total paths remain
+ * so. (There should be no parameterized paths anymore, so we needn't
+ * worry about updating cheapest_parameterized_paths.)
+ */
+ foreach(lc, rel->pathlist)
+ {
+ Path *subpath = (Path *) lfirst(lc);
+ Path *newpath = subpath;
+ ListCell *lc1,
+ *lc2;
+
+ Assert(subpath->param_info == NULL);
+ forboth(lc1, targets, lc2, targets_contain_srfs)
+ {
+ PathTarget *thistarget = (PathTarget *) lfirst(lc1);
+ bool contains_srfs = (bool) lfirst_int(lc2);
+
+ /* If this level doesn't contain SRFs, do regular projection */
+ if (contains_srfs)
+ newpath = (Path *) create_set_projection_path(root,
+ rel,
+ newpath,
+ thistarget);
+ else
+ newpath = (Path *) apply_projection_to_path(root,
+ rel,
+ newpath,
+ thistarget);
+ }
+ lfirst(lc) = newpath;
+ if (subpath == rel->cheapest_startup_path)
+ rel->cheapest_startup_path = newpath;
+ if (subpath == rel->cheapest_total_path)
+ rel->cheapest_total_path = newpath;
+ }
+
+ /* Likewise for partial paths, if any */
+ foreach(lc, rel->partial_pathlist)
+ {
+ Path *subpath = (Path *) lfirst(lc);
+ Path *newpath = subpath;
+ ListCell *lc1,
+ *lc2;
+
+ Assert(subpath->param_info == NULL);
+ forboth(lc1, targets, lc2, targets_contain_srfs)
+ {
+ PathTarget *thistarget = (PathTarget *) lfirst(lc1);
+ bool contains_srfs = (bool) lfirst_int(lc2);
+
+ /* If this level doesn't contain SRFs, do regular projection */
+ if (contains_srfs)
+ newpath = (Path *) create_set_projection_path(root,
+ rel,
+ newpath,
+ thistarget);
+ else
+ {
+ /* avoid apply_projection_to_path, in case of multiple refs */
+ newpath = (Path *) create_projection_path(root,
+ rel,
+ newpath,
+ thistarget);
+ }
+ }
+ lfirst(lc) = newpath;
+ }
+}
+
/*
* expression_planner
* Perform planner's transformations on a standalone expression.
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 413a0d9da2..be267b9da7 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -733,6 +733,9 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
fix_scan_expr(root, splan->resconstantqual, rtoffset);
}
break;
+ case T_ProjectSet:
+ set_upper_references(root, plan, rtoffset);
+ break;
case T_ModifyTable:
{
ModifyTable *splan = (ModifyTable *) plan;
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index aad0b684ed..9fc748973e 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -2680,6 +2680,7 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
&context);
break;
+ case T_ProjectSet:
case T_Hash:
case T_Material:
case T_Sort:
@@ -2687,6 +2688,7 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
case T_Gather:
case T_SetOp:
case T_Group:
+ /* no node-type-specific fields need fixing */
break;
default:
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 9e122e383d..85ffa3afc7 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -99,7 +99,6 @@ static bool contain_agg_clause_walker(Node *node, void *context);
static bool get_agg_clause_costs_walker(Node *node,
get_agg_clause_costs_context *context);
static bool find_window_functions_walker(Node *node, WindowFuncLists *lists);
-static bool expression_returns_set_rows_walker(Node *node, double *count);
static bool contain_subplans_walker(Node *node, void *context);
static bool contain_mutable_functions_walker(Node *node, void *context);
static bool contain_volatile_functions_walker(Node *node, void *context);
@@ -790,114 +789,37 @@ find_window_functions_walker(Node *node, WindowFuncLists *lists)
/*
* expression_returns_set_rows
* Estimate the number of rows returned by a set-returning expression.
- * The result is 1 if there are no set-returning functions.
+ * The result is 1 if it's not a set-returning expression.
*
- * We use the product of the rowcount estimates of all the functions in
- * the given tree (this corresponds to the behavior of ExecMakeFunctionResult
- * for nested set-returning functions).
+ * We should only examine the top-level function or operator; it used to be
+ * appropriate to recurse, but not anymore. (Even if there are more SRFs in
+ * the function's inputs, their multipliers are accounted for separately.)
*
* Note: keep this in sync with expression_returns_set() in nodes/nodeFuncs.c.
*/
double
expression_returns_set_rows(Node *clause)
{
- double result = 1;
-
- (void) expression_returns_set_rows_walker(clause, &result);
- return clamp_row_est(result);
-}
-
-static bool
-expression_returns_set_rows_walker(Node *node, double *count)
-{
- if (node == NULL)
- return false;
- if (IsA(node, FuncExpr))
+ if (clause == NULL)
+ return 1.0;
+ if (IsA(clause, FuncExpr))
{
- FuncExpr *expr = (FuncExpr *) node;
+ FuncExpr *expr = (FuncExpr *) clause;
if (expr->funcretset)
- *count *= get_func_rows(expr->funcid);
+ return clamp_row_est(get_func_rows(expr->funcid));
}
- if (IsA(node, OpExpr))
+ if (IsA(clause, OpExpr))
{
- OpExpr *expr = (OpExpr *) node;
+ OpExpr *expr = (OpExpr *) clause;
if (expr->opretset)
{
set_opfuncid(expr);
- *count *= get_func_rows(expr->opfuncid);
+ return clamp_row_est(get_func_rows(expr->opfuncid));
}
}
-
- /* Avoid recursion for some cases that can't return a set */
- if (IsA(node, Aggref))
- return false;
- if (IsA(node, WindowFunc))
- return false;
- if (IsA(node, DistinctExpr))
- return false;
- if (IsA(node, NullIfExpr))
- return false;
- if (IsA(node, ScalarArrayOpExpr))
- return false;
- if (IsA(node, BoolExpr))
- return false;
- if (IsA(node, SubLink))
- return false;
- if (IsA(node, SubPlan))
- return false;
- if (IsA(node, AlternativeSubPlan))
- return false;
- if (IsA(node, ArrayExpr))
- return false;
- if (IsA(node, RowExpr))
- return false;
- if (IsA(node, RowCompareExpr))
- return false;
- if (IsA(node, CoalesceExpr))
- return false;
- if (IsA(node, MinMaxExpr))
- return false;
- if (IsA(node, XmlExpr))
- return false;
-
- return expression_tree_walker(node, expression_returns_set_rows_walker,
- (void *) count);
-}
-
-/*
- * tlist_returns_set_rows
- * Estimate the number of rows returned by a set-returning targetlist.
- * The result is 1 if there are no set-returning functions.
- *
- * Here, the result is the largest rowcount estimate of any of the tlist's
- * expressions, not the product as you would get from naively applying
- * expression_returns_set_rows() to the whole tlist. The behavior actually
- * implemented by ExecTargetList produces a number of rows equal to the least
- * common multiple of the expression rowcounts, so that the product would be
- * a worst-case estimate that is typically not realistic. Taking the max as
- * we do here is a best-case estimate that might not be realistic either,
- * but it's probably closer for typical usages. We don't try to compute the
- * actual LCM because we're working with very approximate estimates, so their
- * LCM would be unduly noisy.
- */
-double
-tlist_returns_set_rows(List *tlist)
-{
- double result = 1;
- ListCell *lc;
-
- foreach(lc, tlist)
- {
- TargetEntry *tle = (TargetEntry *) lfirst(lc);
- double colresult;
-
- colresult = expression_returns_set_rows((Node *) tle->expr);
- if (result < colresult)
- result = colresult;
- }
- return result;
+ return 1.0;
}
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 3b7c56d3c7..f440875ceb 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2319,6 +2319,72 @@ apply_projection_to_path(PlannerInfo *root,
return path;
}
+/*
+ * create_set_projection_path
+ * Creates a pathnode that represents performing a projection that
+ * includes set-returning functions.
+ *
+ * 'rel' is the parent relation associated with the result
+ * 'subpath' is the path representing the source of data
+ * 'target' is the PathTarget to be computed
+ */
+ProjectSetPath *
+create_set_projection_path(PlannerInfo *root,
+ RelOptInfo *rel,
+ Path *subpath,
+ PathTarget *target)
+{
+ ProjectSetPath *pathnode = makeNode(ProjectSetPath);
+ double tlist_rows;
+ ListCell *lc;
+
+ pathnode->path.pathtype = T_ProjectSet;
+ pathnode->path.parent = rel;
+ pathnode->path.pathtarget = target;
+ /* For now, assume we are above any joins, so no parameterization */
+ pathnode->path.param_info = NULL;
+ pathnode->path.parallel_aware = false;
+ pathnode->path.parallel_safe = rel->consider_parallel &&
+ subpath->parallel_safe &&
+ is_parallel_safe(root, (Node *) target->exprs);
+ pathnode->path.parallel_workers = subpath->parallel_workers;
+ /* Projection does not change the sort order XXX? */
+ pathnode->path.pathkeys = subpath->pathkeys;
+
+ pathnode->subpath = subpath;
+
+ /*
+ * Estimate number of rows produced by SRFs for each row of input; if
+ * there's more than one in this node, use the maximum.
+ */
+ tlist_rows = 1;
+ foreach(lc, target->exprs)
+ {
+ Node *node = (Node *) lfirst(lc);
+ double itemrows;
+
+ itemrows = expression_returns_set_rows(node);
+ if (tlist_rows < itemrows)
+ tlist_rows = itemrows;
+ }
+
+ /*
+ * In addition to the cost of evaluating the tlist, charge cpu_tuple_cost
+ * per input row, and half of cpu_tuple_cost for each added output row.
+ * This is slightly bizarre maybe, but it's what 9.6 did; we may revisit
+ * this estimate later.
+ */
+ pathnode->path.rows = subpath->rows * tlist_rows;
+ pathnode->path.startup_cost = subpath->startup_cost +
+ target->cost.startup;
+ pathnode->path.total_cost = subpath->total_cost +
+ target->cost.startup +
+ (cpu_tuple_cost + target->cost.per_tuple) * subpath->rows +
+ (pathnode->path.rows - subpath->rows) * cpu_tuple_cost / 2;
+
+ return pathnode;
+}
+
/*
* create_sort_path
* Creates a pathnode that represents performing an explicit sort.
diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c
index 45205a830f..cca5db88e2 100644
--- a/src/backend/optimizer/util/tlist.c
+++ b/src/backend/optimizer/util/tlist.c
@@ -16,9 +16,20 @@
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
+#include "optimizer/cost.h"
#include "optimizer/tlist.h"
+typedef struct
+{
+ List *nextlevel_tlist;
+ bool nextlevel_contains_srfs;
+} split_pathtarget_context;
+
+static bool split_pathtarget_walker(Node *node,
+ split_pathtarget_context *context);
+
+
/*****************************************************************************
* Target list creation and searching utilities
*****************************************************************************/
@@ -759,3 +770,191 @@ apply_pathtarget_labeling_to_tlist(List *tlist, PathTarget *target)
i++;
}
}
+
+/*
+ * split_pathtarget_at_srfs
+ * Split given PathTarget into multiple levels to position SRFs safely
+ *
+ * The executor can only handle set-returning functions that appear at the
+ * top level of the targetlist of a ProjectSet plan node. If we have any SRFs
+ * that are not at top level, we need to split up the evaluation into multiple
+ * plan levels in which each level satisfies this constraint. This function
+ * creates appropriate PathTarget(s) for each level.
+ *
+ * As an example, consider the tlist expression
+ * x + srf1(srf2(y + z))
+ * This expression should appear as-is in the top PathTarget, but below that
+ * we must have a PathTarget containing
+ * x, srf1(srf2(y + z))
+ * and below that, another PathTarget containing
+ * x, srf2(y + z)
+ * and below that, another PathTarget containing
+ * x, y, z
+ * When these tlists are processed by setrefs.c, subexpressions that match
+ * output expressions of the next lower tlist will be replaced by Vars,
+ * so that what the executor gets are tlists looking like
+ * Var1 + Var2
+ * Var1, srf1(Var2)
+ * Var1, srf2(Var2 + Var3)
+ * x, y, z
+ * which satisfy the desired property.
+ *
+ * In some cases, a SRF has already been evaluated in some previous plan level
+ * and we shouldn't expand it again (that is, what we see in the target is
+ * already meant as a reference to a lower subexpression). So, don't expand
+ * any tlist expressions that appear in input_target, if that's not NULL.
+ * In principle we might need to consider matching subexpressions to
+ * input_target, but for now it's not necessary because only ORDER BY and
+ * GROUP BY expressions are at issue and those will look the same at both
+ * plan levels.
+ *
+ * The outputs of this function are two parallel lists, one a list of
+ * PathTargets and the other an integer list of bool flags indicating
+ * whether the corresponding PathTarget contains any top-level SRFs.
+ * The lists are given in the order they'd need to be evaluated in, with
+ * the "lowest" PathTarget first. So the last list entry is always the
+ * originally given PathTarget, and any entries before it indicate evaluation
+ * levels that must be inserted below it. The first list entry must not
+ * contain any SRFs, since it will typically be attached to a plan node
+ * that cannot evaluate SRFs.
+ *
+ * Note: using a list for the flags may seem like overkill, since there
+ * are only a few possible patterns for which levels contain SRFs.
+ * But this representation decouples callers from that knowledge.
+ */
+void
+split_pathtarget_at_srfs(PlannerInfo *root,
+ PathTarget *target, PathTarget *input_target,
+ List **targets, List **targets_contain_srfs)
+{
+ /* Initialize output lists to empty; we prepend to them within loop */
+ *targets = *targets_contain_srfs = NIL;
+
+ /* Loop to consider each level of PathTarget we need */
+ for (;;)
+ {
+ bool target_contains_srfs = false;
+ split_pathtarget_context context;
+ ListCell *lc;
+
+ context.nextlevel_tlist = NIL;
+ context.nextlevel_contains_srfs = false;
+
+ /*
+ * Scan the PathTarget looking for SRFs. Top-level SRFs are handled
+ * in this loop, ones lower down are found by split_pathtarget_walker.
+ */
+ foreach(lc, target->exprs)
+ {
+ Node *node = (Node *) lfirst(lc);
+
+ /*
+ * A tlist item that is just a reference to an expression already
+ * computed in input_target need not be evaluated here, so just
+ * make sure it's included in the next PathTarget.
+ */
+ if (input_target && list_member(input_target->exprs, node))
+ {
+ context.nextlevel_tlist = lappend(context.nextlevel_tlist, node);
+ continue;
+ }
+
+ /* Else, we need to compute this expression. */
+ if (IsA(node, FuncExpr) &&
+ ((FuncExpr *) node)->funcretset)
+ {
+ /* Top-level SRF: it can be evaluated here */
+ target_contains_srfs = true;
+ /* Recursively examine SRF's inputs */
+ split_pathtarget_walker((Node *) ((FuncExpr *) node)->args,
+ &context);
+ }
+ else if (IsA(node, OpExpr) &&
+ ((OpExpr *) node)->opretset)
+ {
+ /* Same as above, but for set-returning operator */
+ target_contains_srfs = true;
+ split_pathtarget_walker((Node *) ((OpExpr *) node)->args,
+ &context);
+ }
+ else
+ {
+ /* Not a top-level SRF, so recursively examine expression */
+ split_pathtarget_walker(node, &context);
+ }
+ }
+
+ /*
+ * Prepend current target and associated flag to output lists.
+ */
+ *targets = lcons(target, *targets);
+ *targets_contain_srfs = lcons_int(target_contains_srfs,
+ *targets_contain_srfs);
+
+ /*
+ * Done if we found no SRFs anywhere in this target; the tentative
+ * tlist we built for the next level can be discarded.
+ */
+ if (!target_contains_srfs && !context.nextlevel_contains_srfs)
+ break;
+
+ /*
+ * Else build the next PathTarget down, and loop back to process it.
+ * Copy the subexpressions to make sure PathTargets don't share
+ * substructure (might be unnecessary, but be safe); and drop any
+ * duplicate entries in the sub-targetlist.
+ */
+ target = create_empty_pathtarget();
+ add_new_columns_to_pathtarget(target,
+ (List *) copyObject(context.nextlevel_tlist));
+ set_pathtarget_cost_width(root, target);
+ }
+}
+
+/* Recursively examine expressions for split_pathtarget_at_srfs */
+static bool
+split_pathtarget_walker(Node *node, split_pathtarget_context *context)
+{
+ if (node == NULL)
+ return false;
+ if (IsA(node, Var) ||
+ IsA(node, PlaceHolderVar) ||
+ IsA(node, Aggref) ||
+ IsA(node, GroupingFunc) ||
+ IsA(node, WindowFunc))
+ {
+ /*
+ * Pass these items down to the child plan level for evaluation.
+ *
+ * We assume that these constructs cannot contain any SRFs (if one
+ * does, there will be an executor failure from a misplaced SRF).
+ */
+ context->nextlevel_tlist = lappend(context->nextlevel_tlist, node);
+
+ /* Having done that, we need not examine their sub-structure */
+ return false;
+ }
+ else if ((IsA(node, FuncExpr) &&
+ ((FuncExpr *) node)->funcretset) ||
+ (IsA(node, OpExpr) &&
+ ((OpExpr *) node)->opretset))
+ {
+ /*
+ * Pass SRFs down to the child plan level for evaluation, and mark
+ * that it contains SRFs. (We are not at top level of our own tlist,
+ * else this would have been picked up by split_pathtarget_at_srfs.)
+ */
+ context->nextlevel_tlist = lappend(context->nextlevel_tlist, node);
+ context->nextlevel_contains_srfs = true;
+
+ /* Inputs to the SRF need not be considered here, so we're done */
+ return false;
+ }
+
+ /*
+ * Otherwise, the node is evaluatable within the current PathTarget, so
+ * recurse to examine its inputs.
+ */
+ return expression_tree_walker(node, split_pathtarget_walker,
+ (void *) context);
+}
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index b9c7f72903..d424031676 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -253,6 +253,10 @@ extern Tuplestorestate *ExecMakeTableFunctionResult(ExprState *funcexpr,
MemoryContext argContext,
TupleDesc expectedDesc,
bool randomAccess);
+extern Datum ExecMakeFunctionResultSet(FuncExprState *fcache,
+ ExprContext *econtext,
+ bool *isNull,
+ ExprDoneCond *isDone);
extern Datum ExecEvalExprSwitchContext(ExprState *expression, ExprContext *econtext,
bool *isNull, ExprDoneCond *isDone);
extern ExprState *ExecInitExpr(Expr *node, PlanState *parent);
diff --git a/src/include/executor/nodeProjectSet.h b/src/include/executor/nodeProjectSet.h
new file mode 100644
index 0000000000..30b2b7cec9
--- /dev/null
+++ b/src/include/executor/nodeProjectSet.h
@@ -0,0 +1,24 @@
+/*-------------------------------------------------------------------------
+ *
+ * nodeProjectSet.h
+ *
+ *
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/executor/nodeProjectSet.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef NODEPROJECTSET_H
+#define NODEPROJECTSET_H
+
+#include "nodes/execnodes.h"
+
+extern ProjectSetState *ExecInitProjectSet(ProjectSet *node, EState *estate, int eflags);
+extern TupleTableSlot *ExecProjectSet(ProjectSetState *node);
+extern void ExecEndProjectSet(ProjectSetState *node);
+extern void ExecReScanProjectSet(ProjectSetState *node);
+
+#endif /* NODEPROJECTSET_H */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index ce13bf7635..1da1e1f804 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -696,7 +696,7 @@ typedef struct FuncExprState
/*
* Function manager's lookup info for the target function. If func.fn_oid
* is InvalidOid, we haven't initialized it yet (nor any of the following
- * fields).
+ * fields, except funcReturnsSet).
*/
FmgrInfo func;
@@ -716,6 +716,12 @@ typedef struct FuncExprState
bool funcReturnsTuple; /* valid when funcResultDesc isn't
* NULL */
+ /*
+ * Remember whether the function is declared to return a set. This is set
+ * by ExecInitExpr, and is valid even before the FmgrInfo is set up.
+ */
+ bool funcReturnsSet;
+
/*
* setArgsValid is true when we are evaluating a set-returning function
* that uses value-per-call mode and we are in the middle of a call
@@ -1129,6 +1135,18 @@ typedef struct ResultState
bool rs_checkqual; /* do we need to check the qual? */
} ResultState;
+/* ----------------
+ * ProjectSetState information
+ * ----------------
+ */
+typedef struct ProjectSetState
+{
+ PlanState ps; /* its first field is NodeTag */
+ ExprDoneCond *elemdone; /* array of per-SRF is-done states */
+ int nelems; /* length of elemdone[] array */
+ bool pending_srf_tuples; /* still evaluating srfs in tlist? */
+} ProjectSetState;
+
/* ----------------
* ModifyTableState information
* ----------------
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 4c4319bcab..d65958153d 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -43,6 +43,7 @@ typedef enum NodeTag
*/
T_Plan,
T_Result,
+ T_ProjectSet,
T_ModifyTable,
T_Append,
T_MergeAppend,
@@ -91,6 +92,7 @@ typedef enum NodeTag
*/
T_PlanState,
T_ResultState,
+ T_ProjectSetState,
T_ModifyTableState,
T_AppendState,
T_MergeAppendState,
@@ -245,6 +247,7 @@ typedef enum NodeTag
T_UniquePath,
T_GatherPath,
T_ProjectionPath,
+ T_ProjectSetPath,
T_SortPath,
T_GroupPath,
T_UpperUniquePath,
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 6810f8c099..f72f7a8978 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -176,6 +176,17 @@ typedef struct Result
Node *resconstantqual;
} Result;
+/* ----------------
+ * ProjectSet node -
+ * Apply a projection that includes set-returning functions to the
+ * output tuples of the outer plan.
+ * ----------------
+ */
+typedef struct ProjectSet
+{
+ Plan plan;
+} ProjectSet;
+
/* ----------------
* ModifyTable node -
* Apply rows produced by subplan(s) to result table(s),
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 1e950c4afd..643be54d40 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -1304,6 +1304,17 @@ typedef struct ProjectionPath
bool dummypp; /* true if no separate Result is needed */
} ProjectionPath;
+/*
+ * ProjectSetPath represents evaluation of a targetlist that includes
+ * set-returning function(s), which will need to be implemented by a
+ * ProjectSet plan node.
+ */
+typedef struct ProjectSetPath
+{
+ Path path;
+ Path *subpath; /* path representing input source */
+} ProjectSetPath;
+
/*
* SortPath represents an explicit sort step
*
diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h
index 6173ef8d75..cc0d7b0a26 100644
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@@ -54,7 +54,6 @@ extern bool contain_window_function(Node *clause);
extern WindowFuncLists *find_window_functions(Node *clause, Index maxWinRef);
extern double expression_returns_set_rows(Node *clause);
-extern double tlist_returns_set_rows(List *tlist);
extern bool contain_subplans(Node *clause);
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index d16f879fc1..7b41317621 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -144,6 +144,10 @@ extern Path *apply_projection_to_path(PlannerInfo *root,
RelOptInfo *rel,
Path *path,
PathTarget *target);
+extern ProjectSetPath *create_set_projection_path(PlannerInfo *root,
+ RelOptInfo *rel,
+ Path *subpath,
+ PathTarget *target);
extern SortPath *create_sort_path(PlannerInfo *root,
RelOptInfo *rel,
Path *subpath,
diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h
index f80b31a673..976024a164 100644
--- a/src/include/optimizer/tlist.h
+++ b/src/include/optimizer/tlist.h
@@ -61,6 +61,9 @@ extern void add_column_to_pathtarget(PathTarget *target,
extern void add_new_column_to_pathtarget(PathTarget *target, Expr *expr);
extern void add_new_columns_to_pathtarget(PathTarget *target, List *exprs);
extern void apply_pathtarget_labeling_to_tlist(List *tlist, PathTarget *target);
+extern void split_pathtarget_at_srfs(PlannerInfo *root,
+ PathTarget *target, PathTarget *input_target,
+ List **targets, List **targets_contain_srfs);
/* Convenience macro to get a PathTarget with valid cost/width fields */
#define create_pathtarget(root, tlist) \
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index fa1f5e7879..0ff80620cc 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -822,8 +822,9 @@ explain (costs off)
-> Limit
-> Index Only Scan Backward using tenk1_unique2 on tenk1
Index Cond: (unique2 IS NOT NULL)
- -> Result
-(7 rows)
+ -> ProjectSet
+ -> Result
+(8 rows)
select max(unique2), generate_series(1,3) as g from tenk1 order by g desc;
max | g
diff --git a/src/test/regress/expected/limit.out b/src/test/regress/expected/limit.out
index 9c3eecfc3b..65c8c44a9a 100644
--- a/src/test/regress/expected/limit.out
+++ b/src/test/regress/expected/limit.out
@@ -208,13 +208,15 @@ select currval('testseq');
explain (verbose, costs off)
select unique1, unique2, generate_series(1,10)
from tenk1 order by unique2 limit 7;
- QUERY PLAN
-----------------------------------------------------------
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: unique1, unique2, (generate_series(1, 10))
- -> Index Scan using tenk1_unique2 on public.tenk1
+ -> ProjectSet
Output: unique1, unique2, generate_series(1, 10)
-(4 rows)
+ -> Index Scan using tenk1_unique2 on public.tenk1
+ Output: unique1, unique2, two, four, ten, twenty, hundred, thousand, twothousand, fivethous, tenthous, odd, even, stringu1, stringu2, string4
+(6 rows)
select unique1, unique2, generate_series(1,10)
from tenk1 order by unique2 limit 7;
@@ -236,7 +238,7 @@ select unique1, unique2, generate_series(1,10)
--------------------------------------------------------------------
Limit
Output: unique1, unique2, (generate_series(1, 10)), tenthous
- -> Result
+ -> ProjectSet
Output: unique1, unique2, generate_series(1, 10), tenthous
-> Sort
Output: unique1, unique2, tenthous
@@ -263,9 +265,10 @@ explain (verbose, costs off)
select generate_series(0,2) as s1, generate_series((random()*.1)::int,2) as s2;
QUERY PLAN
------------------------------------------------------------------------------------------------------
- Result
+ ProjectSet
Output: generate_series(0, 2), generate_series(((random() * '0.1'::double precision))::integer, 2)
-(2 rows)
+ -> Result
+(3 rows)
select generate_series(0,2) as s1, generate_series((random()*.1)::int,2) as s2;
s1 | s2
@@ -283,9 +286,10 @@ order by s2 desc;
Sort
Output: (generate_series(0, 2)), (generate_series(((random() * '0.1'::double precision))::integer, 2))
Sort Key: (generate_series(((random() * '0.1'::double precision))::integer, 2)) DESC
- -> Result
+ -> ProjectSet
Output: generate_series(0, 2), generate_series(((random() * '0.1'::double precision))::integer, 2)
-(5 rows)
+ -> Result
+(6 rows)
select generate_series(0,2) as s1, generate_series((random()*.1)::int,2) as s2
order by s2 desc;
diff --git a/src/test/regress/expected/portals.out b/src/test/regress/expected/portals.out
index 3ae918a63c..1b8f7b69d1 100644
--- a/src/test/regress/expected/portals.out
+++ b/src/test/regress/expected/portals.out
@@ -1320,18 +1320,20 @@ fetch backward all in c1;
rollback;
begin;
explain (costs off) declare c2 cursor for select generate_series(1,3) as g;
- QUERY PLAN
-------------
- Result
-(1 row)
-
-explain (costs off) declare c2 scroll cursor for select generate_series(1,3) as g;
QUERY PLAN
--------------
- Materialize
+ ProjectSet
-> Result
(2 rows)
+explain (costs off) declare c2 scroll cursor for select generate_series(1,3) as g;
+ QUERY PLAN
+--------------------
+ Materialize
+ -> ProjectSet
+ -> Result
+(3 rows)
+
declare c2 scroll cursor for select generate_series(1,3) as g;
fetch all in c2;
g
diff --git a/src/test/regress/expected/rangefuncs.out b/src/test/regress/expected/rangefuncs.out
index 275b66204a..56481de5c3 100644
--- a/src/test/regress/expected/rangefuncs.out
+++ b/src/test/regress/expected/rangefuncs.out
@@ -1995,12 +1995,10 @@ SELECT *,
END)
FROM
(VALUES (1,''), (2,'0000000049404'), (3,'FROM 10000000876')) v(id, str);
- id | str | lower
-----+------------------+------------------
- 1 | |
- 2 | 0000000049404 | 49404
- 3 | FROM 10000000876 | from 10000000876
-(3 rows)
+ id | str | lower
+----+---------------+-------
+ 2 | 0000000049404 | 49404
+(1 row)
-- check whole-row-Var handling in nested lateral functions (bug #11703)
create function extractq2(t int8_tbl) returns int8 as $$
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index eda319d24b..abd3217e86 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -807,24 +807,28 @@ select * from int4_tbl where
explain (verbose, costs off)
select * from int4_tbl o where (f1, f1) in
(select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
- QUERY PLAN
-----------------------------------------------------------------
- Hash Semi Join
+ QUERY PLAN
+-------------------------------------------------------------------
+ Nested Loop Semi Join
Output: o.f1
- Hash Cond: (o.f1 = "ANY_subquery".f1)
+ Join Filter: (o.f1 = "ANY_subquery".f1)
-> Seq Scan on public.int4_tbl o
Output: o.f1
- -> Hash
+ -> Materialize
Output: "ANY_subquery".f1, "ANY_subquery".g
-> Subquery Scan on "ANY_subquery"
Output: "ANY_subquery".f1, "ANY_subquery".g
Filter: ("ANY_subquery".f1 = "ANY_subquery".g)
- -> HashAggregate
- Output: i.f1, (generate_series(1, 2) / 10)
- Group Key: i.f1
- -> Seq Scan on public.int4_tbl i
- Output: i.f1
-(15 rows)
+ -> Result
+ Output: i.f1, ((generate_series(1, 2)) / 10)
+ -> ProjectSet
+ Output: i.f1, generate_series(1, 2)
+ -> HashAggregate
+ Output: i.f1
+ Group Key: i.f1
+ -> Seq Scan on public.int4_tbl i
+ Output: i.f1
+(19 rows)
select * from int4_tbl o where (f1, f1) in
(select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
@@ -899,9 +903,10 @@ select * from
Subquery Scan on ss
Output: x, u
Filter: tattle(ss.x, 8)
- -> Result
+ -> ProjectSet
Output: 9, unnest('{1,2,3,11,12,13}'::integer[])
-(5 rows)
+ -> Result
+(6 rows)
select * from
(select 9 as x, unnest(array[1,2,3,11,12,13]) as u) ss
@@ -930,10 +935,11 @@ select * from
where tattle(x, 8);
QUERY PLAN
----------------------------------------------------
- Result
+ ProjectSet
Output: 9, unnest('{1,2,3,11,12,13}'::integer[])
- One-Time Filter: tattle(9, 8)
-(3 rows)
+ -> Result
+ One-Time Filter: tattle(9, 8)
+(4 rows)
select * from
(select 9 as x, unnest(array[1,2,3,11,12,13]) as u) ss
@@ -959,9 +965,10 @@ select * from
Subquery Scan on ss
Output: x, u
Filter: tattle(ss.x, ss.u)
- -> Result
+ -> ProjectSet
Output: 9, unnest('{1,2,3,11,12,13}'::integer[])
-(5 rows)
+ -> Result
+(6 rows)
select * from
(select 9 as x, unnest(array[1,2,3,11,12,13]) as u) ss
diff --git a/src/test/regress/expected/tsrf.out b/src/test/regress/expected/tsrf.out
index 7bb6d17fcb..8c47f0f668 100644
--- a/src/test/regress/expected/tsrf.out
+++ b/src/test/regress/expected/tsrf.out
@@ -25,8 +25,8 @@ SELECT generate_series(1, 2), generate_series(1,4);
-----------------+-----------------
1 | 1
2 | 2
- 1 | 3
- 2 | 4
+ | 3
+ | 4
(4 rows)
-- srf, with SRF argument
@@ -43,7 +43,16 @@ SELECT generate_series(1, generate_series(1, 3));
-- srf, with two SRF arguments
SELECT generate_series(generate_series(1,3), generate_series(2, 4));
-ERROR: functions and operators can take at most one set argument
+ generate_series
+-----------------
+ 1
+ 2
+ 2
+ 3
+ 3
+ 4
+(6 rows)
+
CREATE TABLE few(id int, dataa text, datab text);
INSERT INTO few VALUES(1, 'a', 'foo'),(2, 'a', 'bar'),(3, 'b', 'bar');
-- SRF output order of sorting is maintained, if SRF is not referenced
@@ -118,15 +127,15 @@ SELECT few.dataa, count(*), min(id), max(id), unnest('{1,1,3}'::int[]) FROM few
SELECT few.dataa, count(*), min(id), max(id), unnest('{1,1,3}'::int[]) FROM few WHERE few.id = 1 GROUP BY few.dataa, unnest('{1,1,3}'::int[]);
dataa | count | min | max | unnest
-------+-------+-----+-----+--------
- a | 2 | 1 | 1 | 1
a | 1 | 1 | 1 | 3
+ a | 2 | 1 | 1 | 1
(2 rows)
SELECT few.dataa, count(*), min(id), max(id), unnest('{1,1,3}'::int[]) FROM few WHERE few.id = 1 GROUP BY few.dataa, 5;
dataa | count | min | max | unnest
-------+-------+-----+-----+--------
- a | 2 | 1 | 1 | 1
a | 1 | 1 | 1 | 3
+ a | 2 | 1 | 1 | 1
(2 rows)
-- check HAVING works when GROUP BY does [not] reference SRF output
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index 67f5fc4361..d22db69c7d 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -636,9 +636,10 @@ ORDER BY x;
-> HashAggregate
Group Key: (1), (generate_series(1, 10))
-> Append
+ -> ProjectSet
+ -> Result
-> Result
- -> Result
-(9 rows)
+(10 rows)
SELECT * FROM
(SELECT 1 AS t, generate_series(1,10) AS x
--
cgit v1.2.3
From f13a1277aa2df301583c6db9c2989d2e9d7f6483 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Wed, 18 Jan 2017 18:10:23 -0500
Subject: Doc: improve documentation of new SRF-in-tlist behavior.
Correct a misstatement about how things used to work: we did allow nested
SRFs before, as long as no function had more than one set-returning input.
Also, attempt to document the fact that the new implementation changes the
behavior for SRFs within conditional constructs (eg CASE): the conditional
construct no longer gates whether the SRF is run, and thus cannot affect
the number of rows emitted. We might want to change this behavior, but
first it behooves us to see if we can explain it.
Minor other wordsmithing on what I wrote yesterday, too.
Discussion: https://postgr.es/m/20170118214702.54b2mdbxce5piwv5@alap3.anarazel.de
---
doc/src/sgml/xfunc.sgml | 57 ++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 47 insertions(+), 10 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/xfunc.sgml b/doc/src/sgml/xfunc.sgml
index 09427bbed2..255bfddad7 100644
--- a/doc/src/sgml/xfunc.sgml
+++ b/doc/src/sgml/xfunc.sgml
@@ -998,7 +998,7 @@ SELECT name, listchildren(name) FROM nodes;
- If there is more than one set-returning function in the same select
+ If there is more than one set-returning function in the query's select
list, the behavior is similar to what you get from putting the functions
into a single LATERAL ROWS FROM( ... )> FROM>-clause
item. For each row from the underlying query, there is an output row
@@ -1007,21 +1007,53 @@ SELECT name, listchildren(name) FROM nodes;
produce fewer outputs than others, null values are substituted for the
missing data, so that the total number of rows emitted for one
underlying row is the same as for the set-returning function that
- produced the most outputs.
+ produced the most outputs. Thus the set-returning functions
+ run in lockstep> until they are all exhausted, and then
+ execution continues with the next underlying row.
Set-returning functions can be nested in a select list, although that is
not allowed in FROM>-clause items. In such cases, each level
of nesting is treated separately, as though it were
- another LATERAL ROWS FROM( ... )> item. For example, in
+ a separate LATERAL ROWS FROM( ... )> item. For example, in
-SELECT srf1(srf2(x), srf3(y)), srf4(srf5(z)) FROM ...
+SELECT srf1(srf2(x), srf3(y)), srf4(srf5(z)) FROM tab;
the set-returning functions srf2>, srf3>,
- and srf5> would be run in lockstep for each row of the
- underlying query, and then srf1> and srf4> would
- be applied in lockstep to each row produced by the lower functions.
+ and srf5> would be run in lockstep for each row
+ of tab>, and then srf1> and srf4>
+ would be applied in lockstep to each row produced by the lower
+ functions.
+
+
+
+ This behavior also means that set-returning functions will be evaluated
+ even when it might appear that they should be skipped because of a
+ conditional-evaluation construct, such as CASE>
+ or COALESCE>. For example, consider
+
+SELECT x, CASE WHEN x > 0 THEN generate_series(1, 5) ELSE 0 END FROM tab;
+
+ It might seem that this should produce five repetitions of input
+ rows that have x > 0>, and a single repetition of those
+ that do not; but actually it will produce five repetitions of every
+ input row. This is because generate_series()> is run first,
+ and then the CASE> expression is applied to its result rows.
+ The behavior is thus comparable to
+
+SELECT x, CASE WHEN x > 0 THEN g ELSE 0 END
+ FROM tab, LATERAL generate_series(1,5) AS g;
+
+ It would be exactly the same, except that in this specific example,
+ the planner could choose to put g> on the outside of the
+ nestloop join, since g> has no actual lateral dependency
+ on tab>. That would result in a different output row
+ order. Set-returning functions in the select list are always evaluated
+ as though they are on the inside of a nestloop join with the rest of
+ the FROM> clause, so that the function(s) are run to
+ completion before the next row from the FROM> clause is
+ considered.
@@ -1043,9 +1075,14 @@ SELECT srf1(srf2(x), srf3(y)), srf4(srf5(z)) FROM ...
sensibly unless they always produced equal numbers of rows. Otherwise,
what you got was a number of output rows equal to the least common
multiple of the numbers of rows produced by the set-returning
- functions. Furthermore, nested set-returning functions did not work at
- all. Use of the LATERAL> syntax is recommended when writing
- queries that need to work in older PostgreSQL> versions.
+ functions. Also, nested set-returning functions did not work as
+ described above; instead, a set-returning function could have at most
+ one set-returning argument, and each nest of set-returning functions
+ was run independently. The behavior for conditional execution
+ (set-returning functions inside CASE> etc) was different too.
+ Use of the LATERAL> syntax is recommended when writing
+ queries that need to work in older PostgreSQL> versions,
+ because that will give consistent results across different versions.
--
cgit v1.2.3
From 665d1fad99e7b11678b0d5fa24d2898424243cd6 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Thu, 19 Jan 2017 12:00:00 -0500
Subject: Logical replication
- Add PUBLICATION catalogs and DDL
- Add SUBSCRIPTION catalog and DDL
- Define logical replication protocol and output plugin
- Add logical replication workers
From: Petr Jelinek
Reviewed-by: Steve Singer
Reviewed-by: Andres Freund
Reviewed-by: Erik Rijkers
Reviewed-by: Peter Eisentraut
---
doc/src/sgml/catalogs.sgml | 309 +++++
doc/src/sgml/config.sgml | 41 +
doc/src/sgml/filelist.sgml | 1 +
doc/src/sgml/func.sgml | 2 +-
doc/src/sgml/logical-replication.sgml | 396 ++++++
doc/src/sgml/monitoring.sgml | 74 +
doc/src/sgml/postgres.sgml | 1 +
doc/src/sgml/protocol.sgml | 721 ++++++++++
doc/src/sgml/ref/allfiles.sgml | 6 +
doc/src/sgml/ref/alter_publication.sgml | 139 ++
doc/src/sgml/ref/alter_subscription.sgml | 139 ++
doc/src/sgml/ref/create_publication.sgml | 206 +++
doc/src/sgml/ref/create_subscription.sgml | 176 +++
doc/src/sgml/ref/drop_publication.sgml | 107 ++
doc/src/sgml/ref/drop_subscription.sgml | 110 ++
doc/src/sgml/ref/pg_dump.sgml | 21 +
doc/src/sgml/ref/psql-ref.sgml | 28 +
doc/src/sgml/reference.sgml | 6 +
src/Makefile | 1 +
src/backend/access/transam/xact.c | 2 +
src/backend/catalog/Makefile | 7 +-
src/backend/catalog/aclchk.c | 57 +
src/backend/catalog/catalog.c | 8 +-
src/backend/catalog/dependency.c | 24 +
src/backend/catalog/objectaddress.c | 219 +++
src/backend/catalog/pg_publication.c | 457 +++++++
src/backend/catalog/pg_shdepend.c | 11 +
src/backend/catalog/pg_subscription.c | 207 +++
src/backend/catalog/system_views.sql | 25 +
src/backend/commands/Makefile | 8 +-
src/backend/commands/alter.c | 10 +
src/backend/commands/dbcommands.c | 17 +
src/backend/commands/define.c | 28 +
src/backend/commands/dropcmds.c | 4 +
src/backend/commands/event_trigger.c | 8 +
src/backend/commands/publicationcmds.c | 754 +++++++++++
src/backend/commands/subscriptioncmds.c | 643 +++++++++
src/backend/commands/tablecmds.c | 12 +
src/backend/executor/Makefile | 2 +-
src/backend/executor/execMain.c | 3 +-
src/backend/executor/execReplication.c | 553 ++++++++
src/backend/nodes/copyfuncs.c | 78 ++
src/backend/nodes/equalfuncs.c | 73 +
src/backend/parser/gram.y | 262 +++-
src/backend/postmaster/bgworker.c | 23 +-
src/backend/postmaster/pgstat.c | 6 +
src/backend/postmaster/postmaster.c | 9 +
.../libpqwalreceiver/libpqwalreceiver.c | 217 ++-
src/backend/replication/logical/Makefile | 4 +-
src/backend/replication/logical/launcher.c | 759 +++++++++++
src/backend/replication/logical/proto.c | 637 +++++++++
src/backend/replication/logical/relation.c | 489 +++++++
src/backend/replication/logical/worker.c | 1429 ++++++++++++++++++++
src/backend/replication/pgoutput/Makefile | 32 +
src/backend/replication/pgoutput/pgoutput.c | 596 ++++++++
src/backend/replication/walreceiver.c | 20 +-
src/backend/storage/ipc/ipci.c | 3 +
src/backend/storage/lmgr/lwlocknames.txt | 2 +
src/backend/tcop/utility.c | 83 ++
src/backend/utils/cache/inval.c | 35 +-
src/backend/utils/cache/relcache.c | 136 +-
src/backend/utils/cache/syscache.c | 69 +
src/backend/utils/misc/guc.c | 13 +
src/backend/utils/misc/postgresql.conf.sample | 1 +
src/bin/pg_dump/common.c | 12 +
src/bin/pg_dump/pg_backup.h | 3 +
src/bin/pg_dump/pg_backup_archiver.c | 7 +-
src/bin/pg_dump/pg_dump.c | 464 +++++++
src/bin/pg_dump/pg_dump.h | 46 +-
src/bin/pg_dump/pg_dump_sort.c | 20 +-
src/bin/pg_dump/pg_restore.c | 3 +
src/bin/pg_dump/t/002_pg_dump.pl | 76 +-
src/bin/psql/command.c | 16 +
src/bin/psql/describe.c | 292 ++++
src/bin/psql/describe.h | 9 +
src/bin/psql/help.c | 2 +
src/bin/psql/tab-complete.c | 53 +-
src/include/catalog/dependency.h | 3 +
src/include/catalog/indexing.h | 18 +
src/include/catalog/pg_proc.h | 6 +
src/include/catalog/pg_publication.h | 104 ++
src/include/catalog/pg_publication_rel.h | 52 +
src/include/catalog/pg_subscription.h | 83 ++
src/include/commands/defrem.h | 1 +
src/include/commands/publicationcmds.h | 28 +
src/include/commands/subscriptioncmds.h | 27 +
src/include/executor/executor.h | 17 +
src/include/nodes/nodes.h | 5 +
src/include/nodes/parsenodes.h | 51 +
src/include/parser/kwlist.h | 3 +
src/include/pgstat.h | 4 +-
src/include/replication/logicallauncher.h | 27 +
src/include/replication/logicalproto.h | 104 ++
src/include/replication/logicalrelation.h | 43 +
src/include/replication/logicalworker.h | 17 +
src/include/replication/pgoutput.h | 29 +
src/include/replication/walreceiver.h | 60 +-
src/include/replication/worker_internal.h | 62 +
src/include/storage/sinval.h | 3 +-
src/include/utils/acl.h | 4 +
src/include/utils/inval.h | 2 +
src/include/utils/rel.h | 6 +
src/include/utils/relcache.h | 6 +
src/include/utils/syscache.h | 6 +
src/test/Makefile | 2 +-
src/test/perl/PostgresNode.pm | 13 +-
src/test/regress/expected/publication.out | 156 +++
src/test/regress/expected/rules.out | 18 +
src/test/regress/expected/sanity_check.out | 3 +
src/test/regress/expected/subscription.out | 66 +
src/test/regress/parallel_schedule | 3 +
src/test/regress/serial_schedule | 2 +
src/test/regress/sql/publication.sql | 82 ++
src/test/regress/sql/subscription.sql | 44 +
src/test/subscription/.gitignore | 2 +
src/test/subscription/Makefile | 22 +
src/test/subscription/README | 16 +
src/test/subscription/t/001_rep_changes.pl | 188 +++
src/test/subscription/t/002_types.pl | 539 ++++++++
119 files changed, 13354 insertions(+), 95 deletions(-)
create mode 100644 doc/src/sgml/logical-replication.sgml
create mode 100644 doc/src/sgml/ref/alter_publication.sgml
create mode 100644 doc/src/sgml/ref/alter_subscription.sgml
create mode 100644 doc/src/sgml/ref/create_publication.sgml
create mode 100644 doc/src/sgml/ref/create_subscription.sgml
create mode 100644 doc/src/sgml/ref/drop_publication.sgml
create mode 100644 doc/src/sgml/ref/drop_subscription.sgml
create mode 100644 src/backend/catalog/pg_publication.c
create mode 100644 src/backend/catalog/pg_subscription.c
create mode 100644 src/backend/commands/publicationcmds.c
create mode 100644 src/backend/commands/subscriptioncmds.c
create mode 100644 src/backend/executor/execReplication.c
create mode 100644 src/backend/replication/logical/launcher.c
create mode 100644 src/backend/replication/logical/proto.c
create mode 100644 src/backend/replication/logical/relation.c
create mode 100644 src/backend/replication/logical/worker.c
create mode 100644 src/backend/replication/pgoutput/Makefile
create mode 100644 src/backend/replication/pgoutput/pgoutput.c
create mode 100644 src/include/catalog/pg_publication.h
create mode 100644 src/include/catalog/pg_publication_rel.h
create mode 100644 src/include/catalog/pg_subscription.h
create mode 100644 src/include/commands/publicationcmds.h
create mode 100644 src/include/commands/subscriptioncmds.h
create mode 100644 src/include/replication/logicallauncher.h
create mode 100644 src/include/replication/logicalproto.h
create mode 100644 src/include/replication/logicalrelation.h
create mode 100644 src/include/replication/logicalworker.h
create mode 100644 src/include/replication/pgoutput.h
create mode 100644 src/include/replication/worker_internal.h
create mode 100644 src/test/regress/expected/publication.out
create mode 100644 src/test/regress/expected/subscription.out
create mode 100644 src/test/regress/sql/publication.sql
create mode 100644 src/test/regress/sql/subscription.sql
create mode 100644 src/test/subscription/.gitignore
create mode 100644 src/test/subscription/Makefile
create mode 100644 src/test/subscription/README
create mode 100644 src/test/subscription/t/001_rep_changes.pl
create mode 100644 src/test/subscription/t/002_types.pl
(limited to 'doc/src')
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 493050618d..7c758a5081 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -240,6 +240,16 @@
functions and procedures
+
+ pg_publication
+ publications for logical replication
+
+
+
+ pg_publication_rel
+ relation to publication mapping
+
+
pg_rangeinformation about range types
@@ -285,6 +295,11 @@
planner statistics
+
+ pg_subscription
+ logical replication subscriptions
+
+
pg_tablespacetablespaces within this database cluster
@@ -5271,6 +5286,137 @@
+
+ pg_publication
+
+
+ pg_publication
+
+
+
+ The catalog pg_publication contains all
+ publications created in the database. For more on publications see
+ .
+
+
+
+ pg_publication Columns
+
+
+
+
+ Name
+ Type
+ References
+ Description
+
+
+
+
+
+ oid
+ oid
+
+ Row identifier (hidden attribute; must be explicitly selected)
+
+
+
+ pubname
+ Name
+
+ Name of the publication
+
+
+
+ pubowner
+ oid
+ pg_authid.oid
+ Owner of the publication
+
+
+
+ puballtables
+ bool
+
+ If true, this publication automatically includes all tables
+ in the database, including any that will be created in the future.
+
+
+
+
+ pubinsert
+ bool
+
+ If true, INSERT operations are replicated for
+ tables in the publication.
+
+
+
+ pubupdate
+ bool
+
+ If true, UPDATE operations are replicated for
+ tables in the publication.
+
+
+
+ pubdelete
+ bool
+
+ If true, DELETE operations are replicated for
+ tables in the publication.
+
+
+
+
+
+
+
+ pg_publication_rel
+
+
+ pg_publication_rel
+
+
+
+ The catalog pg_publication_rel contains the
+ mapping between relations and publications in the database. This is a
+ many-to-many mapping. See also
+ for a more user-friendly view of this information.
+
+
+
+ pg_publication_rel Columns
+
+
+
+
+ Name
+ Type
+ References
+ Description
+
+
+
+
+
+ prpubid
+ oid
+ pg_publication.oid
+ Reference to publication
+
+
+
+ prrelid
+ oid
+ pg_class.oid
+ Reference to relation
+
+
+
+
+
+
pg_range
@@ -6150,6 +6296,109 @@
+
+ pg_subscription
+
+
+ pg_subscription
+
+
+
+ The catalog pg_subscription contains all existing
+ logical replication subscriptions. For more information about logical
+ replication see .
+
+
+
+ Unlike most system catalogs, pg_subscription is
+ shared across all databases of a cluster: There is only one copy
+ of pg_subscription per cluster, not one per
+ database.
+
+
+
+ Access to this catalog is restricted from normal users. Normal users can
+ use the view to get some information
+ about subscriptions.
+
+
+
+ pg_subscription Columns
+
+
+
+
+ Name
+ Type
+ References
+ Description
+
+
+
+
+
+ oid
+ oid
+
+ Row identifier (hidden attribute; must be explicitly selected)
+
+
+
+ subdbid
+ oid
+ pg_database.oid
+ OID of the database which the subscription resides in
+
+
+
+ subname
+ name
+
+ Name of the subscription
+
+
+
+ subowner
+ oid
+ pg_authid.oid
+ Owner of the subscription
+
+
+
+ subenabled
+ bool
+
+ If true, the subscription is enabled and should be replicating.
+
+
+
+ subconninfo
+ text
+
+ Connection string to the upstream database
+
+
+
+ subslotname
+ name
+
+ Name of the replication slot in the upstream database. Also used
+ for local replication origin name.
+
+
+
+ subpublications
+ text[]
+
+ Array of subscribed publication names. These reference the
+ publications on the publisher server. For more on publications
+ see .
+
+
+
+
+
+ pg_tablespace
@@ -7589,6 +7838,11 @@
prepared transactions
+
+ pg_publication_tables
+ publications and their associated tables
+
+
pg_replication_origin_statusinformation about replication origins, including replication progress
@@ -8871,6 +9125,61 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
+
+ pg_publication_tables
+
+
+ pg_publication_tables
+
+
+
+ The view pg_publication_tables provides
+ information about the mapping between publications and the tables they
+ contain. Unlike the underlying
+ catalog pg_publication_rel, this view expands
+ publications defined as FOR ALL TABLES, so for such
+ publications there will be a row for each eligible table.
+
+
+
+ pg_publication_tables Columns
+
+
+
+
+ Name
+ Type
+ References
+ Description
+
+
+
+
+
+ pubname
+ name
+ pg_publication.pubname
+ Name of publication
+
+
+
+ schemaname
+ name
+ pg_namespace.nspname
+ Name of schema containing table
+
+
+
+ tablename
+ name
+ pg_class.relname
+ Name of table
+
+
+
+
+
+
pg_replication_origin_status
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 07afa3c77a..fb5d6473ef 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3411,6 +3411,47 @@ ANY num_sync (
+ Subscribers
+
+
+ These settings control the behavior of a logical replication subscriber.
+ Their values on the publisher are irrelevant.
+
+
+
+ Note that wal_receiver_timeout and
+ wal_retrieve_retry_interval configuration parameters
+ affect the logical replication workers as well.
+
+
+
+
+
+ max_logical_replication_workers (int)
+
+ max_logical_replication_workers> configuration parameter
+
+
+
+
+ Specifies maximum number of logical replication workers. This includes
+ both apply workers and table synchronization workers.
+
+
+ Logical replication workers are taken from the pool defined by
+ max_worker_processes.
+
+
+ The default value is 4.
+
+
+
+
+
+
+
diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml
index 69649a7da4..2624c627dc 100644
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -50,6 +50,7 @@
+
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 2504a466e6..b214218791 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -18762,7 +18762,7 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
-
+ pg_replication_origin_advance
diff --git a/doc/src/sgml/logical-replication.sgml b/doc/src/sgml/logical-replication.sgml
new file mode 100644
index 0000000000..9312c0c9a0
--- /dev/null
+++ b/doc/src/sgml/logical-replication.sgml
@@ -0,0 +1,396 @@
+
+
+
+ Logical Replication
+
+
+ Logical replication is a method of replicating data objects and their
+ changes, based upon their replication identity (usually a primary key). We
+ use the term logical in contrast to physical replication, which uses exact
+ block addresses and byte-by-byte replication. PostgreSQL supports both
+ mechanisms concurrently, see . Logical
+ replication allows fine-grained control over both data replication and
+ security.
+
+
+
+ Logical replication uses a publish
+ and subscribe model with one or
+ more subscribers subscribing to one or more
+ publications on a publisher
+ node. Subscribers pull data from the publications they subscribe to and may
+ subsequently re-publish data to allow cascading replication or more complex
+ configurations.
+
+
+
+ Logical replication sends the changes on the publisher to the subscriber as
+ they occur in real-time. The subscriber applies the data in the same order
+ as the publisher so that transactional consistency is guaranteed for
+ publications within a single subscription. This method of data replication
+ is sometimes referred to as transactional replication.
+
+
+
+ The typical use-cases for logical replication are:
+
+
+
+
+ Sending incremental changes in a single database or a subset of a
+ database to subscribers as they occur.
+
+
+
+
+
+ Firing triggers for individual changes as they are incoming to
+ subscriber.
+
+
+
+
+
+ Consolidating multiple databases into a single one (for example for
+ analytical purposes).
+
+
+
+
+
+ Replicating between different major versions of PostgreSQL.
+
+
+
+
+
+ Giving access to replicated data to different groups of users.
+
+
+
+
+
+ Sharing a subset of the database between multiple databases.
+
+
+
+
+
+
+ The subscriber database behaves in the same way as any other PostgreSQL
+ instance and can be used as a publisher for other databases by defining its
+ own publications. When the subscriber is treated as read-only by
+ application, there will be no conflicts from a single subscription. On the
+ other hand, if there are other writes done either by application or other
+ subscribers to the same set of tables conflicts can arise.
+
+
+
+ Publication
+
+
+ A publication object can be defined on any physical
+ replication master. The node where a publication is defined is referred to
+ as publisher. A publication is a set of changes
+ generated from a group of tables, and might also be described as a change
+ set or replication set. Each publication exists in only one database.
+
+
+
+ Publications are different from schemas and do not affect how the table is
+ accessed. Each table can be added to multiple publications if needed.
+ Publications may currently only contain tables. Objects must be added
+ explicitly, except when a publication is created for ALL
+ TABLES.
+
+
+
+ Publications can choose to limit the changes they produce to show
+ any combination of INSERT, UPDATE, and
+ DELETE in a similar way to the way triggers are fired by
+ particular event types. If a table without a REPLICA
+ IDENTITY is added to a publication that
+ replicates UPDATE or DELETE
+ operations then subsequent UPDATE
+ or DELETE operations will fail on the publisher.
+
+
+
+ Every publication can have multiple subscribers.
+
+
+
+ A publication is created using the
+ command and may be later altered or dropped using corresponding commands.
+
+
+
+ The individual tables can be added and removed dynamically using
+ . Both the ADD
+ TABLE and DROP TABLE operations are
+ transactional; so the table will start or stop replicating at the correct
+ snapshot once the transaction has committed.
+
+
+
+
+ Subscription
+
+
+ A subscription is the downstream side of logical
+ replication. The node where a subscription is defined is referred to as
+ the subscriber. Subscription defines the connection
+ to another database and set of publications (one or more) to which it wants
+ to be subscribed.
+
+
+
+ The subscriber database behaves in the same way as any other PostgreSQL
+ instance and can be used as a publisher for other databases by defining its
+ own publications.
+
+
+
+ A subscriber node may have multiple subscriptions if desired. It is
+ possible to define multiple subscriptions between a single
+ publisher-subscriber pair, in which case extra care must be taken to ensure
+ that the subscribed publication objects don't overlap.
+
+
+
+ Each subscription will receive changes via one replication slot (see
+ ).
+
+
+
+ Subscriptions are not dumped by pg_dump by default but
+ can be requested using the command-line
+ option
--subscriptions
.
+
+
+
+ The subscription is added using and
+ can be stopped/resumed at any time using the
+ command and removed using
+ .
+
+
+
+ When a subscription is dropped and recreated, the synchronization
+ information is lost. This means that the data has to be resynchronized
+ afterwards.
+
+
+
+ The schema definitions are not replicated and the published tables must
+ exist on the subsriber for replication to work. Only regular tables may be
+ the target of replication. For example, you can't replicate to a view.
+
+
+
+ The tables are matched between the publisher and the subscriber using the
+ fully qualified table name. Replication to differently-named tables on the
+ subscriber is not supported.
+
+
+
+ Columns of a table are also matched by name. A different order of columns
+ in the target table is allowed, but the column types have to match.
+
+
+
+
+ Conflicts
+
+
+ The logical replication behaves similarly to normal DML operations in that
+ the data will be updated even if it was changed locally on the subscriber
+ node. If the incoming data violates any constraints the replication will
+ stop. This is referred to as a conflict. When
+ replicating UPDATE or DELETE
+ operations, missing data will not produce a conflict and such operations
+ will simply be skipped.
+
+
+
+ A conflict will produce an error and will stop the replication; it must be
+ resolved manually by the user. Details about the conflict can be found in
+ the subscriber's server log.
+
+
+
+ The resolution can be done either by changing data on the subscriber so
+ that it does not conflict with the incoming change or by skipping the
+ transaction that conflicts with the existing data. The transaction can be
+ skipped by calling the
+ pg_replication_origin_advance() function with
+ a node_name corresponding to the subscription name.
+ The current position of origins can be seen in the
+
+ pg_replication_origin_status system view.
+
+
+
+
+ Architecture
+
+
+ Logical replication starts by copying a snapshot of the data on the
+ publisher database. Once that is done, changes on the publisher are sent
+ to the subscriber as they occur in real time. The subscriber applies data
+ in the order in which commits were made on the publisher so that
+ transactional consistency is guaranteed for the publications within any
+ single subscription.
+
+
+
+ Logical replication is built with an architecture similar to physical
+ streaming replication (see ). It is
+ implemented by walsender and the apply
+ processes. The walsender starts logical decoding (described
+ in ) of the WAL and loads the standard
+ logical decoding plugin (pgoutput). The plugin transforms the changes read
+ from WAL to the logical replication protocol
+ (see ) and filters the data
+ according to the publication specification. The data is then continuously
+ transferred using the streaming replication protocol to the apply worker,
+ which maps the data to local tables and applies the individual changes as
+ they are received in exact transactional order.
+
+
+
+ The apply process on the subscriber database always runs with
+ session_replication_role set
+ to replica, which produces the usual effects on triggers
+ and constraints.
+
+
+
+
+ Monitoring
+
+
+ Because logical replication is based on similar architecture as
+ physical streaming replication
+ the monitoring on a publication node is very similar to monitoring of
+ physical replication master
+ (see ).
+
+
+
+ The monitoring information about subscription is visible in
+ pg_stat_subscription.
+ This view contains one row for every subscription worker. A subscription
+ can have zero or more active subscription workers depending on its state.
+
+
+
+ Normally, there is a single apply process running for an enabled
+ subscription. A disabled subscription or a crashed subscription will have
+ zero rows in this view.
+
+
+
+
+ Security
+
+
+ Logical replication connections occur in the same way as physical streaming
+ replication. It requires access to be specifically given using
+ pg_hba.conf. The role used for the replication
+ connection must have the REPLICATION attribute. This
+ gives a role access to both logical and physical replication.
+
+
+
+ To create a publication, the user must have the CREATE
+ privilege in the database.
+
+
+
+ To create a subscription, the user must be a superuser.
+
+
+
+ The subscription apply process will run in the local database with the
+ privileges of a superuser.
+
+
+
+ Privileges are only checked once at the start of a replication connection.
+ They are not re-checked as each change record is read from the publisher,
+ nor are they re-checked for each change when applied.
+
+
+
+
+ Configuration Settings
+
+
+ Logical replication requires several configuration options to be set.
+
+
+
+ On the publisher side, wal_level must be set to
+ logical, and max_replication_slots
+ has to be set to at least the number of subscriptions expected to connect.
+ And max_wal_senders should be set to at least the same
+ as max_replication_slots plus the number of physical replicas
+ that are connected at the same time.
+
+
+
+ The subscriber also requires the max_replication_slots
+ to be set. In this case it should be set to at least the number of
+ subscriptions that will be added to the subscriber.
+ max_logical_replication_workers has to be set to at
+ least the number of subscriptions. Additionally the
+ max_worker_processes may need to be adjusted to
+ accommodate for replication workers, at least
+ (max_logical_replication_workers
+ + 1). Note that some extensions and parallel queries
+ also take worker slots from max_worker_processes.
+
+
+
+
+ Quick Setup
+
+
+ First set the configuration options in postgresql.conf:
+
+wal_level = logical
+
+ The other required settings have default values that are sufficient for a
+ basic setup.
+
+
+
+ pg_hba.conf needs to be adjusted to allow replication
+ (the values here depend on your actual network configuration and user you
+ want to use for connecting):
+
+host replication repuser 0.0.0.0/0 md5
+
+
+
+
+ Then on the publisher database:
+
+CREATE PUBLICATION mypub FOR TABLE users, departments;
+
+
+
+
+ And on the subscriber database:
+
+CREATE SUBSCRIPTION mysub CONNECTION 'dbname=foo host=bar user=repuser' PUBLICATION mypub;
+
+
+
+
+ The above will start the replication process of changes to
+ users and departments tables.
+
+
+
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 1545f03656..01fad3870f 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -308,6 +308,14 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
+
+ pg_stat_subscription>pg_stat_subscription
+ At least one row per subscription, showing information about
+ the subscription workers.
+ See for details.
+
+
+
pg_stat_ssl>pg_stat_sslOne row per connection (regular and replication), showing information about
@@ -1545,6 +1553,72 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
connected server.
+
+ pg_stat_subscription View
+
+
+
+ Column
+ Type
+ Description
+
+
+
+
+
+ subid>
+ oid>
+ OID of the subscription
+
+
+ subname>
+ text>
+ Name of the subscription
+
+
+ pid>
+ integer>
+ Process ID of the subscription worker process
+
+
+ received_lsn>
+ pg_lsn>
+ Last transaction log position received, the initial value of
+ this field being 0
+
+
+ last_msg_send_time>
+ timestamp with time zone>
+ Send time of last message received from origin WAL sender
+
+
+ last_msg_receipt_time>
+ timestamp with time zone>
+ Receipt time of last message received from origin WAL sender
+
+
+
+ latest_end_lsn>
+ pg_lsn>
+ Last transaction log position reported to origin WAL sender
+
+
+
+ latest_end_time>
+ timestamp with time zone>
+ Time of last transaction log position reported to origin WAL
+ sender
+
+
+
+
+
+
+ The pg_stat_subscription view will contain one
+ row per subscription for main worker (with null PID if the worker is
+ not running).
+
+
pg_stat_ssl View
diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml
index 9143917c49..4e169d1b18 100644
--- a/doc/src/sgml/postgres.sgml
+++ b/doc/src/sgml/postgres.sgml
@@ -160,6 +160,7 @@
&monitoring;
&diskusage;
&wal;
+ &logical-replication;
®ress;
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index 9ba147cae5..5f89db5857 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -2122,6 +2122,119 @@ The commands accepted in walsender mode are:
+
+ Logical Streaming Replication Protocol
+
+
+ This section describes the logical replication protocol, which is the message
+ flow started by the START_REPLICATION
+ SLOTslot_name>
+ LOGICAL replication command.
+
+
+
+ The logical streaming replication protocol builds on the primitives of
+ the physical streaming replication protocol.
+
+
+
+ Logical Streaming Replication Parameters
+
+
+ The logical replication START_REPLICATION command
+ accepts following parameters:
+
+
+
+
+ proto_version
+
+
+
+ Protocol version. Currently only version 1 is
+ supported.
+
+
+
+
+
+
+ publication_names
+
+
+
+ Comma separated list of publication names for which to subscribe
+ (receive changes). The individual publication names are treated
+ as standard objects names and can be quoted the same as needed.
+
+
+
+
+
+
+
+
+
+ Logical Replication Protocol Messages
+
+
+ The individual protocol messages are discussed in the following
+ subsections. Individual messages are describer in
+ section.
+
+
+
+ All top-level protocol messages begin with a message type byte.
+ While represented in code as a character, this is a signed byte with no
+ associated encoding.
+
+
+
+ Since the streaming replication protocol supplies a message length there
+ is no need for top-level protocol messages to embed a length in their
+ header.
+
+
+
+
+
+ Logical Replication Protocol Message Flow
+
+
+ With the exception of the START_REPLICATION command and
+ the replay progress messages, all information flows only from the backend
+ to the frontend.
+
+
+
+ The logical replication protocol sends individual transactions one by one.
+ This means that all messages between a pair of Begin and Commit messages
+ belong to the same transaction.
+
+
+
+ Every sent transaction contains zero or more DML messages (Insert,
+ Update, Delete). In case of a cascaded setup it can also contain Origin
+ messages. The origin message indicated that the transaction originated on
+ different replication node. Since a replication node in the scope of logical
+ replication protocol can be pretty much anything, the only identifier
+ is the origin name. It's downstream's responsibility to handle this as
+ needed (if needed). The Origin message is always sent before any DML
+ messages in the transaction.
+
+
+
+ Every DML message contains an arbitrary relation ID, which can be mapped to
+ an ID in the Relation messages. The Relation messages describe the schema of the
+ given relation. The Relation message is sent for a given relation either
+ because it is the first time we send a DML message for given relation in the
+ current session or because the relation definition has changed since the
+ last Relation message was sent for it. The protocol assumes that the client
+ is capable of caching the metadata for as many relations as needed.
+
+
+
+
Message Data Types
@@ -5149,6 +5262,614 @@ not line breaks.
+
+Logical Replication Message Formats
+
+
+This section describes the detailed format of each logical replication message.
+These messages are returned either by the replication slot SQL interface or are
+sent by a walsender. In case of a walsender they are encapsulated inside the replication
+protocol WAL messages as described in
+and generally obey same message flow as physical replication.
+
+
+
+
+
+
+Begin
+
+
+
+
+
+
+
+ Byte1('B')
+
+
+
+ Identifies the message as a begin message.
+
+
+
+
+
+ Int64
+
+
+
+ The final LSN of the transaction.
+
+
+
+
+
+ Int64
+
+
+
+ Commit timestamp of the transaction. The value is in number
+ of microseconds since PostgreSQL epoch (2000-01-01).
+
+
+
+
+
+ Int32
+
+
+
+ Xid of the transaction.
+
+
+
+
+
+
+
+
+
+
+
+Commit
+
+
+
+
+
+
+
+ Byte1('C')
+
+
+
+ Identifies the message as a commit message.
+
+
+
+
+
+ Int64
+
+
+
+ The LSN of the commit.
+
+
+
+
+
+ Int64
+
+
+
+ The end LSN of the transaction.
+
+
+
+
+
+ Int64
+
+
+
+ Commit timestamp of the transaction. The value is in number
+ of microseconds since PostgreSQL epoch (2000-01-01).
+
+
+
+
+
+
+
+
+
+
+
+Origin
+
+
+
+
+
+
+
+ Byte1('O')
+
+
+
+ Identifies the message as an origin message.
+
+
+
+
+
+ Int64
+
+
+
+ The LSN of the commit on the origin server.
+
+
+
+
+
+ String
+
+
+
+ Name of the origin.
+
+
+
+
+
+
+
+
+ Note that there can be multiple Origin messages inside a single transaction.
+
+
+
+
+
+
+
+Relation
+
+
+
+
+
+
+
+ Byte1('R')
+
+
+
+ Identifies the message as a relation message.
+
+
+
+
+
+ Int32
+
+
+
+ ID of the relation.
+
+
+
+
+
+ String
+
+
+
+ Namespace (empty string for pg_catalog).
+
+
+
+
+
+ String
+
+
+
+ Relation name.
+
+
+
+
+
+
+ Int8
+
+
+
+ Replica identity setting for the relation (same as
+ relreplident in pg_class).
+
+
+
+
+
+
+ Int16
+
+
+
+ Number of columns.
+
+
+
+
+ Next, the following message part appears for each column:
+
+
+
+ Int8
+
+
+
+ Flags for the column. Currently can be either 0 for no flags
+ or 1 which marks the column as part of the key.
+
+
+
+
+
+ String
+
+
+
+ Name of the column.
+
+
+
+
+
+
+
+
+
+
+
+Insert
+
+
+
+
+
+
+
+ Byte1('I')
+
+
+
+ Identifies the message as an insert message.
+
+
+
+
+
+ Int32
+
+
+
+ ID of the relation corresponding to the ID in the relation
+ message.
+
+
+
+
+
+ Byte1('N')
+
+
+
+ Identifies the following TupleData message as a new tuple.
+
+
+
+
+
+
+ TupleData
+
+
+
+ TupleData message part representing the contents of new tuple.
+
+
+
+
+
+
+
+
+
+
+
+Update
+
+
+
+
+
+
+
+ Byte1('U')
+
+
+
+ Identifies the message as an update message.
+
+
+
+
+
+ Int32
+
+
+
+ ID of the relation corresponding to the ID in the relation
+ message.
+
+
+
+
+
+
+ Byte1('K')
+
+
+
+ Identifies the following TupleData submessage as a key.
+ This field is optional and is only present if
+ the update changed data in any of the column(s) that are
+ part of the REPLICA IDENTITY index.
+
+
+
+
+
+
+ Byte1('O')
+
+
+
+ Identifies the following TupleData submessage as an old tuple.
+ This field is optional and is only present if table in which
+ the update happened has REPLICA IDENTITY set to FULL.
+
+
+
+
+
+
+ TupleData
+
+
+
+ TupleData message part representing the contents of the old tuple
+ or primary key. Only present if the previous 'O' or 'K' part
+ is present.
+
+
+
+
+
+
+ Byte1('N')
+
+
+
+ Identifies the following TupleData message as a new tuple.
+
+
+
+
+
+
+ TupleData
+
+
+
+ TupleData message part representing the contents of a new tuple.
+
+
+
+
+
+
+
+
+ The Update message may contain either a 'K' message part or an 'O' message part
+ or neither of them, but never both of them.
+
+
+
+
+
+
+
+Delete
+
+
+
+
+
+
+
+ Byte1('D')
+
+
+
+ Identifies the message as a delete message.
+
+
+
+
+
+ Int32
+
+
+
+ ID of the relation corresponding to the ID in the relation
+ message.
+
+
+
+
+
+
+ Byte1('K')
+
+
+
+ Identifies the following TupleData submessage as a key.
+ This field is present if the table in which the delete has
+ happened uses an index as REPLICA IDENTITY.
+
+
+
+
+
+
+ Byte1('O')
+
+
+
+ Identifies the following TupleData message as a old tuple.
+ This field is is present if the table in which the delete has
+ happened has REPLICA IDENTITY set to FULL.
+
+
+
+
+
+
+ TupleData
+
+
+
+ TupleData message part representing the contents of the old tuple
+ or primary key, depending on the previous field.
+
+
+
+
+
+
+
+ The Delete message may contain either a 'K' message part or an 'O' message part,
+ but never both of them.
+
+
+
+
+
+
+
+
+
+Following message parts that are shared by above messages.
+
+
+
+
+
+
+
+TupleData
+
+
+
+
+
+
+
+ Int16
+
+
+
+ Number of columns.
+
+
+
+
+ Next, one of the following submessages appears for each column:
+
+
+
+ Byte1('n')
+
+
+
+ Idenfifies the data as NULL value.
+
+
+
+
+ Or
+
+
+
+ Byte1('u')
+
+
+
+ Idenfifies unchanged TOASTed value (the actual value is not
+ sent).
+
+
+
+
+ Or
+
+
+
+ Byte1('t')
+
+
+
+ Idenfifies the data as text formatted value.
+
+
+
+
+
+ Int32
+
+
+
+ Length of the column value.
+
+
+
+
+
+ String
+
+
+
+ The text value.
+
+
+
+
+
+
+
+
+
+
+
+
+
Summary of Changes since Protocol 2.0
diff --git a/doc/src/sgml/ref/allfiles.sgml b/doc/src/sgml/ref/allfiles.sgml
index 77667bdebd..0d09f81ccc 100644
--- a/doc/src/sgml/ref/allfiles.sgml
+++ b/doc/src/sgml/ref/allfiles.sgml
@@ -26,11 +26,13 @@ Complete list of usable sgml source files in this directory.
+
+
@@ -72,11 +74,13 @@ Complete list of usable sgml source files in this directory.
+
+
@@ -116,11 +120,13 @@ Complete list of usable sgml source files in this directory.
+
+
diff --git a/doc/src/sgml/ref/alter_publication.sgml b/doc/src/sgml/ref/alter_publication.sgml
new file mode 100644
index 0000000000..47d83b80be
--- /dev/null
+++ b/doc/src/sgml/ref/alter_publication.sgml
@@ -0,0 +1,139 @@
+
+
+
+
+ ALTER PUBLICATION
+
+
+
+ ALTER PUBLICATION
+ 7
+ SQL - Language Statements
+
+
+
+ ALTER PUBLICATION
+ change the definition of a publication
+
+
+
+
+ALTER PUBLICATION name WITH ( option [, ... ] )
+
+where option can be:
+
+ PUBLISH INSERT | NOPUBLISH INSERT
+ | PUBLISH UPDATE | NOPUBLISH UPDATE
+ | PUBLISH DELETE | NOPUBLISH DELETE
+
+ALTER PUBLICATION name OWNER TO { new_owner | CURRENT_USER | SESSION_USER }
+ALTER PUBLICATION name ADD TABLE table_name [, ...]
+ALTER PUBLICATION name SET TABLE table_name [, ...]
+ALTER PUBLICATION name DROP TABLE table_name [, ...]
+
+
+
+
+ Description
+
+
+ The first variant of this command listed in the synopsis can change
+ all of the publication properties specified in
+ . Properties not mentioned in the
+ command retain their previous settings. Database superusers can change any
+ of these settings for any role.
+
+
+
+ To alter the owner, you must also be a direct or indirect member of the
+ new owning role. The new owner has to be a superuser
+
+
+
+ The other variants of this command deal with the table membership of the
+ publication. The SET TABLE clause will replace the
+ list of tables in the publication with the specified one.
+ The ADD TABLE and
+ DROP TABLE will add and remove one or more tables from
+ the publication.
+
+
+
+
+ Parameters
+
+
+
+ name
+
+
+ The name of an existing publication whose definition is to be altered.
+
+
+
+
+
+ PUBLISH INSERT
+ NOPUBLISH INSERT
+ PUBLISH UPDATE
+ NOPUBLISH UPDATE
+ PUBLISH DELETE
+ NOPUBLISH DELETE
+
+
+ These clauses alter properties originally set by
+ . See there for more information.
+
+
+
+
+
+ table_name
+
+
+ Name of an existing table.
+
+
+
+
+
+
+
+ Examples
+
+
+ Change the publication to not publish inserts:
+
+ALTER PUBLICATION noinsert WITH (NOPUBLISH INSERT);
+
+
+
+
+ Add some tables to the publication:
+
+ALTER PUBLICATION mypublication ADD TABLE users, departments;
+
+
+
+
+
+ Compatibility
+
+
+ ALTER PUBLICATION is a PostgreSQL>
+ extension.
+
+
+
+
+ See Also
+
+
+
+
+
+
+
diff --git a/doc/src/sgml/ref/alter_subscription.sgml b/doc/src/sgml/ref/alter_subscription.sgml
new file mode 100644
index 0000000000..032ecbb885
--- /dev/null
+++ b/doc/src/sgml/ref/alter_subscription.sgml
@@ -0,0 +1,139 @@
+
+
+
+
+ ALTER SUBSCRIPTION
+
+
+
+ ALTER SUBSCRIPTION
+ 7
+ SQL - Language Statements
+
+
+
+ ALTER SUBSCRIPTION
+ change the definition of a subscription
+
+
+
+
+ALTER SUBSCRIPTION name WITH ( option [, ... ] ) ]
+
+where option can be:
+
+ SLOT NAME = slot_name
+
+ALTER SUBSCRIPTION name OWNER TO { new_owner | CURRENT_USER | SESSION_USER }
+ALTER SUBSCRIPTION name CONNECTION 'conninfo'
+ALTER SUBSCRIPTION name SET PUBLICATION publication_name [, ...]
+ALTER SUBSCRIPTION name ENABLE
+ALTER SUBSCRIPTION name DISABLE
+
+
+
+
+ Description
+
+
+ ALTER SUBSCRIPTION can change most of the subscription
+ properties that can be specified
+ in .
+
+
+
+ To alter the owner, you must also be a direct or indirect member of the
+ new owning role. The new owner has to be a superuser
+
+
+
+
+ Parameters
+
+
+
+ name
+
+
+ The name of a subscription whose properties are to be altered.
+
+
+
+
+
+ CONNECTION 'conninfo'
+ SET PUBLICATION publication_name
+ SLOT NAME = slot_name
+
+
+ These clauses alter properties originally set by
+ . See there for more
+ information.
+
+
+
+
+
+ ENABLE
+
+
+ Enables the previously disabled subscription, starting the logical
+ replication worker at the end of transaction.
+
+
+
+
+
+ DISABLE
+
+
+ Disables the running subscription, stopping the logical replication
+ worker at the end of transaction.
+
+
+
+
+
+
+
+ Examples
+
+
+ Change the publication subscribed by a subscription to
+ insert_only:
+
+ALTER SUBSCRIPTION mysub SET PUBLICATION insert_only;
+
+
+
+
+ Disable (stop) the subscription:
+
+ALTER SUBSCRIPTION mysub DISABLE;
+
+
+
+
+
+ Compatibility
+
+
+ ALTER SUBSCRIPTION is a PostgreSQL>
+ extension.
+
+
+
+
+ See Also
+
+
+
+
+
+
+
+
+
diff --git a/doc/src/sgml/ref/create_publication.sgml b/doc/src/sgml/ref/create_publication.sgml
new file mode 100644
index 0000000000..995f2bcf3c
--- /dev/null
+++ b/doc/src/sgml/ref/create_publication.sgml
@@ -0,0 +1,206 @@
+
+
+
+
+ CREATE PUBLICATION
+
+
+
+ CREATE PUBLICATION
+ 7
+ SQL - Language Statements
+
+
+
+ CREATE PUBLICATION
+ define a new publication
+
+
+
+
+CREATE PUBLICATION name
+ [ FOR TABLE table_name [, ...]
+ | FOR ALL TABLES ]
+ [ WITH ( option [, ... ] ) ]
+
+where option can be:
+
+ PUBLISH INSERT | NOPUBLISH INSERT
+ | PUBLISH UPDATE | NOPUBLISH UPDATE
+ | PUBLISH DELETE | NOPUBLISH DELETE
+
+
+
+
+ Description
+
+
+ CREATE PUBLICATION adds a new publication
+ into the current database. The publication name must be distinct from
+ the name of any existing publication in the current database.
+
+
+
+ A publication is essentially a group of tables whose data changes are
+ intended to be replicated through logical replication. See
+ for details about how
+ publications fit into the logical replication setup.
+
+
+
+
+ Parameters
+
+
+
+ name
+
+
+ The name of the new publication.
+
+
+
+
+
+ FOR TABLE
+
+
+ Specifies a list of tables to add to the publication.
+
+
+
+
+
+ FOR ALL TABLES
+
+
+ Marks the publication as one that replicates changes for all tables in
+ the database, including tables created in the future.
+
+
+
+
+
+ PUBLISH INSERT
+ NOPUBLISH INSERT
+
+
+ These clauses determine whether the new publication will send
+ the INSERT operations to the subscribers.
+ PUBLISH INSERT is the default.
+
+
+
+
+
+ PUBLISH UPDATE
+ NOPUBLISH UPDATE
+
+
+ These clauses determine whether the new publication will send
+ the UPDATE operations to the subscribers.
+ PUBLISH UPDATE is the default.
+
+
+
+
+
+ PUBLISH DELETE
+ NOPUBLISH DELETE
+
+
+ These clauses determine whether the new publication will send
+ the DELETE operations to the subscribers.
+ PUBLISH DELETE is the default.
+
+
+
+
+
+
+
+
+ Notes
+
+
+ If neither FOR TABLE nor FOR ALL
+ TABLES is specified, then the publication starts out with an
+ empty set of tables. That is useful if tables are to be added later.
+
+
+
+ The creation of a publication does not start replication. It only defines
+ a grouping and filtering logic for future subscribers.
+
+
+
+ To create a publication, the invoking user must have the
+ CREATE> privilege for the current database.
+ (Of course, superusers bypass this check.)
+
+
+
+ To add a table to a publication, the invoking user must have
+ SELECT privilege on given table. The
+ FOR ALL TABLES clause requires superuser.
+
+
+
+ The tables added to a publication that publishes UPDATE
+ and/or DELETE operations must have
+ REPLICA IDENTITY> defined. Otherwise those operations will be
+ disallowed on those tables.
+
+
+
+ For an INSERT ... ON CONFLICT> command, the publication will
+ publish the operation that actually results from the command. So depending
+ of the outcome, it may be published as either INSERT or
+ UPDATE, or it may not be published at all.
+
+
+
+ TRUNCATE and other DDL operations
+ are not published.
+
+
+
+
+ Examples
+
+
+ Create a simple publication that just publishes all DML for tables in it:
+
+CREATE PUBLICATION mypublication;
+
+
+
+
+ Create an insert-only publication:
+
+CREATE PUBLICATION insert_only WITH (NOPUBLISH UPDATE, NOPUBLISH DELETE);
+
+
+
+
+
+ Compatibility
+
+
+ CREATE PUBLICATION is a PostgreSQL>
+ extension.
+
+
+
+
+ See Also
+
+
+
+
+
+
+
diff --git a/doc/src/sgml/ref/create_subscription.sgml b/doc/src/sgml/ref/create_subscription.sgml
new file mode 100644
index 0000000000..40d08b3440
--- /dev/null
+++ b/doc/src/sgml/ref/create_subscription.sgml
@@ -0,0 +1,176 @@
+
+
+
+
+ CREATE SUBSCRIPTION
+
+
+
+ CREATE SUBSCRIPTION
+ 7
+ SQL - Language Statements
+
+
+
+ CREATE SUBSCRIPTION
+ define a new subscription
+
+
+
+
+CREATE SUBSCRIPTION subscription_name CONNECTION 'conninfo' PUBLICATION { publication_name [, ...] } [ WITH ( option [, ... ] ) ]
+
+where option can be:
+
+ | ENABLED | DISABLED
+ | CREATE SLOT | NOCREATE SLOT
+ | SLOT NAME = slot_name
+
+
+
+
+ Description
+
+
+ CREATE SUBSCRIPTION adds a new subscription for a
+ current database. The subscription name must be distinct from the name of
+ any existing subscription in the database.
+
+
+
+ The subscription represents a replication connection to the publisher. As
+ such this command does not only add definitions in the local catalogs but
+ also creates a replication slot on the publisher.
+
+
+
+ A logical replication worker will be started to replicate data for the new
+ subscription at the commit of the transaction where this command is run.
+
+
+
+ Additional info about subscriptions and logical replication as a whole
+ can is available at and
+ .
+
+
+
+
+
+ Parameters
+
+
+
+ subscription_name
+
+
+ The name of the new subscription.
+
+
+
+
+
+ CONNECTION 'conninfo'
+
+
+ The connection string to the publisher.
+
+
+
+
+
+ PUBLICATION publication_name
+
+
+ Name(s) of the publications on the publisher to subscribe to.
+
+
+
+
+
+ ENABLED
+ DISABLED
+
+
+ Specifies whether the subscription should be actively replicating or
+ if it should be just setup but not started yet. Note that the
+ replication slot as described above is created in either case.
+ ENABLED is the default.
+
+
+
+
+
+ CREATE SLOT
+ NOCREATE SLOT
+
+
+ Specifies whether the command should create the replication slot on the
+ publisher. CREATE SLOT is the default.
+
+
+
+
+
+ SLOT NAME = slot_name
+
+
+ Name of the replication slot to use. The default behavior is to use
+ subscription_name> for slot name.
+
+
+
+
+
+
+
+ Examples
+
+
+ Create a subscription to a remote server that replicates tables in
+ the publications mypubclication and
+ insert_only and starts replicating immediately on
+ commit:
+
+CREATE SUBSCRIPTION mysub
+ CONNECTION 'host=192.168.1.50 port=5432 user=foo dbname=foodb password=foopass'
+ PUBLICATION mypublication, insert_only;
+
+
+
+
+ Create a subscription to a remote server that replicates tables in
+ the insert_only publication and does not start replicating
+ until enabled at a later time.
+
+CREATE SUBSCRIPTION mysub
+ CONNECTION 'host=192.168.1.50 port=5432 user=foo dbname=foodb password=foopass'
+ PUBLICATION insert_only
+ WITH (DISABLED);
+
+
+
+
+
+ Compatibility
+
+
+ CREATE SUBSCRIPTION is a PostgreSQL>
+ extension.
+
+
+
+
+ See Also
+
+
+
+
+
+
+
+
+
diff --git a/doc/src/sgml/ref/drop_publication.sgml b/doc/src/sgml/ref/drop_publication.sgml
new file mode 100644
index 0000000000..1a1be579ad
--- /dev/null
+++ b/doc/src/sgml/ref/drop_publication.sgml
@@ -0,0 +1,107 @@
+
+
+
+
+ DROP PUBLICATION
+
+
+
+ DROP PUBLICATION
+ 7
+ SQL - Language Statements
+
+
+
+ DROP PUBLICATION
+ remove a publication
+
+
+
+
+DROP PUBLICATION [ IF EXISTS ] name [, ...] [ CASCADE | RESTRICT ]
+
+
+
+
+ Description
+
+
+ DROP PUBLICATION removes an existing publication from
+ the database.
+
+
+
+ A publication can only be dropped by its owner or a superuser.
+
+
+
+
+ Parameters
+
+
+
+ IF EXISTS
+
+
+ Do not throw an error if the extension does not exist. A notice is issued
+ in this case.
+
+
+
+
+
+ name
+
+
+ The name of an existing publication.
+
+
+
+
+
+ CASCADE
+ RESTRICT
+
+
+
+ These key words do not have any effect, since there are no dependencies
+ on publications.
+
+
+
+
+
+
+
+ Examples
+
+
+ Drop a publication:
+
+DROP PUBLICATION mypublication;
+
+
+
+
+
+
+ Compatibility
+
+
+ DROP PUBLICATION is a PostgreSQL>
+ extension.
+
+
+
+
+ See Also
+
+
+
+
+
+
+
diff --git a/doc/src/sgml/ref/drop_subscription.sgml b/doc/src/sgml/ref/drop_subscription.sgml
new file mode 100644
index 0000000000..9f2fb93275
--- /dev/null
+++ b/doc/src/sgml/ref/drop_subscription.sgml
@@ -0,0 +1,110 @@
+
+
+
+
+ DROP SUBSCRIPTION
+
+
+
+ DROP SUBSCRIPTION
+ 7
+ SQL - Language Statements
+
+
+
+ DROP SUBSCRIPTION
+ remove a subscription
+
+
+
+
+DROP SUBSCRIPTION [ IF EXISTS ] name [ DROP SLOT | NODROP SLOT ]
+
+
+
+
+ Description
+
+
+ DROP SUBSCRIPTION removes a subscription from the
+ database cluster.
+
+
+
+ A subscription can only be dropped by a superuser.
+
+
+
+ The replication worker associated with the subscription will not stop until
+ after the transaction that issued this command has committed.
+
+
+
+
+ Parameters
+
+
+
+ name
+
+
+ The name of a subscription to be dropped.
+
+
+
+
+
+ DROP SLOT
+ NODROP SLOT
+
+
+ Specifies whether to drop the replication slot on the publisher. The
+ default is
+ DROP SLOT.
+
+
+
+ If the publisher is not reachable when the subscription is to be
+ dropped, then it is useful to specify NODROP SLOT.
+ But the replication slot on the publisher will then have to be removed
+ manually.
+
+
+
+
+
+
+
+
+ Examples
+
+
+ Drop a subscription:
+
+DROP SUBSCRIPTION mysub;
+
+
+
+
+
+
+ Compatibility
+
+
+ DROP SUBSCRIPTION is a PostgreSQL>
+ extension.
+
+
+
+
+ See Also
+
+
+
+
+
+
+
diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml
index b70e7d57e9..a1e03c481d 100644
--- a/doc/src/sgml/ref/pg_dump.sgml
+++ b/doc/src/sgml/ref/pg_dump.sgml
@@ -755,6 +755,15 @@ PostgreSQL documentation
+
+
--include-subscriptions
+
+
+ Include logical replication subscriptions in the dump.
+
+
+
+
--inserts
@@ -789,6 +798,18 @@ PostgreSQL documentation
+
+
--no-create-subscription-slots
+
+
+ When dumping logical replication subscriptions,
+ generate CREATE SUBSCRIPTION commands that do not
+ create the remote replication slot. That way, the dump can be
+ restored without requiring network access to the remote servers.
+
+
+
+
--no-security-labels
diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml
index 991573121b..640fe12bbf 100644
--- a/doc/src/sgml/ref/psql-ref.sgml
+++ b/doc/src/sgml/ref/psql-ref.sgml
@@ -1600,6 +1600,34 @@ testdb=>
+
+ \dRp[+] [ pattern ]
+
+
+ Lists replication publications.
+ If pattern is
+ specified, only those publications whose names match the pattern are
+ listed.
+ If + is appended to the command name, the tables
+ associated with each publication are shown as well.
+
+
+
+
+
+ \dRs[+] [ pattern ]
+
+
+ Lists replication subscriptions.
+ If pattern is
+ specified, only those subscriptions whose names match the pattern are
+ listed.
+ If + is appended to the command name, additional
+ properties of the subscriptions are shown.
+
+
+
+
\dT[S+] [ pattern ]
diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml
index 8acdff1393..34007d3508 100644
--- a/doc/src/sgml/reference.sgml
+++ b/doc/src/sgml/reference.sgml
@@ -54,11 +54,13 @@
&alterOperatorClass;
&alterOperatorFamily;
&alterPolicy;
+ &alterPublication;
&alterRole;
&alterRule;
&alterSchema;
&alterSequence;
&alterServer;
+ &alterSubscription;
&alterSystem;
&alterTable;
&alterTableSpace;
@@ -100,11 +102,13 @@
&createOperatorClass;
&createOperatorFamily;
&createPolicy;
+ &createPublication;
&createRole;
&createRule;
&createSchema;
&createSequence;
&createServer;
+ &createSubscription;
&createTable;
&createTableAs;
&createTableSpace;
@@ -144,11 +148,13 @@
&dropOperatorFamily;
&dropOwned;
&dropPolicy;
+ &dropPublication;
&dropRole;
&dropRule;
&dropSchema;
&dropSequence;
&dropServer;
+ &dropSubscription;
&dropTable;
&dropTableSpace;
&dropTSConfig;
diff --git a/src/Makefile b/src/Makefile
index 977f80b469..b490c07138 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -22,6 +22,7 @@ SUBDIRS = \
include \
interfaces \
backend/replication/libpqwalreceiver \
+ backend/replication/pgoutput \
fe_utils \
bin \
pl \
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index f5346f024e..f6f136da3a 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -42,6 +42,7 @@
#include "miscadmin.h"
#include "pgstat.h"
#include "replication/logical.h"
+#include "replication/logicallauncher.h"
#include "replication/origin.h"
#include "replication/syncrep.h"
#include "replication/walsender.h"
@@ -2135,6 +2136,7 @@ CommitTransaction(void)
AtEOXact_HashTables(true);
AtEOXact_PgStat(true);
AtEOXact_Snapshot(true);
+ AtCommit_ApplyLauncher();
pgstat_report_xact_timestamp(0);
CurrentResourceOwner = NULL;
diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile
index cd38c8ab3f..31368585d2 100644
--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@@ -14,8 +14,9 @@ OBJS = catalog.o dependency.o heap.o index.o indexing.o namespace.o aclchk.o \
objectaccess.o objectaddress.o partition.o pg_aggregate.o pg_collation.o \
pg_constraint.o pg_conversion.o \
pg_depend.o pg_enum.o pg_inherits.o pg_largeobject.o pg_namespace.o \
- pg_operator.o pg_proc.o pg_range.o pg_db_role_setting.o pg_shdepend.o \
- pg_type.o storage.o toasting.o
+ pg_operator.o pg_proc.o pg_publication.o pg_range.o \
+ pg_db_role_setting.o pg_shdepend.o pg_subscription.o pg_type.o \
+ storage.o toasting.o
BKIFILES = postgres.bki postgres.description postgres.shdescription
@@ -42,7 +43,7 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\
pg_foreign_table.h pg_policy.h pg_replication_origin.h \
pg_default_acl.h pg_init_privs.h pg_seclabel.h pg_shseclabel.h \
pg_collation.h pg_partitioned_table.h pg_range.h pg_transform.h \
- pg_sequence.h \
+ pg_sequence.h pg_publication.h pg_publication_rel.h pg_subscription.h \
toasting.h indexing.h \
)
diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c
index 640632784c..a96bf692df 100644
--- a/src/backend/catalog/aclchk.c
+++ b/src/backend/catalog/aclchk.c
@@ -45,6 +45,7 @@
#include "catalog/pg_operator.h"
#include "catalog/pg_opfamily.h"
#include "catalog/pg_proc.h"
+#include "catalog/pg_subscription.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_type.h"
#include "catalog/pg_ts_config.h"
@@ -3390,6 +3391,10 @@ static const char *const not_owner_msg[MAX_ACL_KIND] =
gettext_noop("must be owner of event trigger %s"),
/* ACL_KIND_EXTENSION */
gettext_noop("must be owner of extension %s"),
+ /* ACL_KIND_PUBLICATION */
+ gettext_noop("must be owner of publication %s"),
+ /* ACL_KIND_SUBSCRIPTION */
+ gettext_noop("must be owner of subscription %s"),
};
@@ -5071,6 +5076,58 @@ pg_extension_ownercheck(Oid ext_oid, Oid roleid)
return has_privs_of_role(roleid, ownerId);
}
+/*
+ * Ownership check for an publication (specified by OID).
+ */
+bool
+pg_publication_ownercheck(Oid pub_oid, Oid roleid)
+{
+ HeapTuple tuple;
+ Oid ownerId;
+
+ /* Superusers bypass all permission checking. */
+ if (superuser_arg(roleid))
+ return true;
+
+ tuple = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pub_oid));
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("publication with OID %u does not exist", pub_oid)));
+
+ ownerId = ((Form_pg_publication) GETSTRUCT(tuple))->pubowner;
+
+ ReleaseSysCache(tuple);
+
+ return has_privs_of_role(roleid, ownerId);
+}
+
+/*
+ * Ownership check for an subscription (specified by OID).
+ */
+bool
+pg_subscription_ownercheck(Oid sub_oid, Oid roleid)
+{
+ HeapTuple tuple;
+ Oid ownerId;
+
+ /* Superusers bypass all permission checking. */
+ if (superuser_arg(roleid))
+ return true;
+
+ tuple = SearchSysCache1(SUBSCRIPTIONOID, ObjectIdGetDatum(sub_oid));
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("subscription with OID %u does not exist", sub_oid)));
+
+ ownerId = ((Form_pg_subscription) GETSTRUCT(tuple))->subowner;
+
+ ReleaseSysCache(tuple);
+
+ return has_privs_of_role(roleid, ownerId);
+}
+
/*
* Check whether specified role has CREATEROLE privilege (or is a superuser)
*
diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c
index c3b4d298ce..11ee536726 100644
--- a/src/backend/catalog/catalog.c
+++ b/src/backend/catalog/catalog.c
@@ -36,6 +36,7 @@
#include "catalog/pg_shdepend.h"
#include "catalog/pg_shdescription.h"
#include "catalog/pg_shseclabel.h"
+#include "catalog/pg_subscription.h"
#include "catalog/pg_tablespace.h"
#include "catalog/toasting.h"
#include "miscadmin.h"
@@ -227,7 +228,8 @@ IsSharedRelation(Oid relationId)
relationId == SharedSecLabelRelationId ||
relationId == TableSpaceRelationId ||
relationId == DbRoleSettingRelationId ||
- relationId == ReplicationOriginRelationId)
+ relationId == ReplicationOriginRelationId ||
+ relationId == SubscriptionRelationId)
return true;
/* These are their indexes (see indexing.h) */
if (relationId == AuthIdRolnameIndexId ||
@@ -245,7 +247,9 @@ IsSharedRelation(Oid relationId)
relationId == TablespaceNameIndexId ||
relationId == DbRoleSettingDatidRolidIndexId ||
relationId == ReplicationOriginIdentIndex ||
- relationId == ReplicationOriginNameIndex)
+ relationId == ReplicationOriginNameIndex ||
+ relationId == SubscriptionObjectIndexId ||
+ relationId == SubscriptionNameIndexId)
return true;
/* These are their toast tables and toast indexes (see toasting.h) */
if (relationId == PgShdescriptionToastTable ||
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c
index 359719e450..1c43af6eff 100644
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -48,7 +48,10 @@
#include "catalog/pg_opfamily.h"
#include "catalog/pg_policy.h"
#include "catalog/pg_proc.h"
+#include "catalog/pg_publication.h"
+#include "catalog/pg_publication_rel.h"
#include "catalog/pg_rewrite.h"
+#include "catalog/pg_subscription.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_transform.h"
#include "catalog/pg_trigger.h"
@@ -64,6 +67,7 @@
#include "commands/extension.h"
#include "commands/policy.h"
#include "commands/proclang.h"
+#include "commands/publicationcmds.h"
#include "commands/schemacmds.h"
#include "commands/seclabel.h"
#include "commands/sequence.h"
@@ -164,6 +168,9 @@ static const Oid object_classes[] = {
ExtensionRelationId, /* OCLASS_EXTENSION */
EventTriggerRelationId, /* OCLASS_EVENT_TRIGGER */
PolicyRelationId, /* OCLASS_POLICY */
+ PublicationRelationId, /* OCLASS_PUBLICATION */
+ PublicationRelRelationId, /* OCLASS_PUBLICATION_REL */
+ SubscriptionRelationId, /* OCLASS_SUBSCRIPTION */
TransformRelationId /* OCLASS_TRANSFORM */
};
@@ -1244,6 +1251,14 @@ doDeletion(const ObjectAddress *object, int flags)
RemovePolicyById(object->objectId);
break;
+ case OCLASS_PUBLICATION:
+ RemovePublicationById(object->objectId);
+ break;
+
+ case OCLASS_PUBLICATION_REL:
+ RemovePublicationRelById(object->objectId);
+ break;
+
case OCLASS_TRANSFORM:
DropTransformById(object->objectId);
break;
@@ -2404,6 +2419,15 @@ getObjectClass(const ObjectAddress *object)
case PolicyRelationId:
return OCLASS_POLICY;
+ case PublicationRelationId:
+ return OCLASS_PUBLICATION;
+
+ case PublicationRelRelationId:
+ return OCLASS_PUBLICATION_REL;
+
+ case SubscriptionRelationId:
+ return OCLASS_SUBSCRIPTION;
+
case TransformRelationId:
return OCLASS_TRANSFORM;
}
diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c
index 2b1808b0f9..44d14ae2b1 100644
--- a/src/backend/catalog/objectaddress.c
+++ b/src/backend/catalog/objectaddress.c
@@ -45,7 +45,10 @@
#include "catalog/pg_operator.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_policy.h"
+#include "catalog/pg_publication.h"
+#include "catalog/pg_publication_rel.h"
#include "catalog/pg_rewrite.h"
+#include "catalog/pg_subscription.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_transform.h"
#include "catalog/pg_trigger.h"
@@ -450,6 +453,30 @@ static const ObjectPropertyType ObjectProperty[] =
Anum_pg_type_typacl,
ACL_KIND_TYPE,
true
+ },
+ {
+ PublicationRelationId,
+ PublicationObjectIndexId,
+ PUBLICATIONOID,
+ PUBLICATIONNAME,
+ Anum_pg_publication_pubname,
+ InvalidAttrNumber,
+ Anum_pg_publication_pubowner,
+ InvalidAttrNumber,
+ -1,
+ true
+ },
+ {
+ SubscriptionRelationId,
+ SubscriptionObjectIndexId,
+ SUBSCRIPTIONOID,
+ SUBSCRIPTIONNAME,
+ Anum_pg_subscription_subname,
+ InvalidAttrNumber,
+ Anum_pg_subscription_subowner,
+ InvalidAttrNumber,
+ -1,
+ true
}
};
@@ -653,6 +680,18 @@ static const struct object_type_map
{
"policy", OBJECT_POLICY
},
+ /* OCLASS_PUBLICATION */
+ {
+ "publication", OBJECT_PUBLICATION
+ },
+ /* OCLASS_PUBLICATION_REL */
+ {
+ "publication relation", OBJECT_PUBLICATION_REL
+ },
+ /* OCLASS_SUBSCRIPTION */
+ {
+ "subscription", OBJECT_SUBSCRIPTION
+ },
/* OCLASS_TRANSFORM */
{
"transform", OBJECT_TRANSFORM
@@ -688,6 +727,9 @@ static ObjectAddress get_object_address_opf_member(ObjectType objtype,
static ObjectAddress get_object_address_usermapping(List *objname,
List *objargs, bool missing_ok);
+static ObjectAddress get_object_address_publication_rel(List *objname,
+ List *objargs, Relation *relation,
+ bool missing_ok);
static ObjectAddress get_object_address_defacl(List *objname, List *objargs,
bool missing_ok);
static const ObjectPropertyType *get_object_property_data(Oid class_id);
@@ -812,6 +854,8 @@ get_object_address(ObjectType objtype, List *objname, List *objargs,
case OBJECT_FOREIGN_SERVER:
case OBJECT_EVENT_TRIGGER:
case OBJECT_ACCESS_METHOD:
+ case OBJECT_PUBLICATION:
+ case OBJECT_SUBSCRIPTION:
address = get_object_address_unqualified(objtype,
objname, missing_ok);
break;
@@ -926,6 +970,10 @@ get_object_address(ObjectType objtype, List *objname, List *objargs,
address = get_object_address_usermapping(objname, objargs,
missing_ok);
break;
+ case OBJECT_PUBLICATION_REL:
+ address = get_object_address_publication_rel(objname, objargs,
+ &relation,
+ missing_ok);
case OBJECT_DEFACL:
address = get_object_address_defacl(objname, objargs,
missing_ok);
@@ -1091,6 +1139,12 @@ get_object_address_unqualified(ObjectType objtype,
case OBJECT_EVENT_TRIGGER:
msg = gettext_noop("event trigger name cannot be qualified");
break;
+ case OBJECT_PUBLICATION:
+ msg = gettext_noop("publication name cannot be qualified");
+ break;
+ case OBJECT_SUBSCRIPTION:
+ msg = gettext_noop("subscription name cannot be qualified");
+ break;
default:
elog(ERROR, "unrecognized objtype: %d", (int) objtype);
msg = NULL; /* placate compiler */
@@ -1156,6 +1210,16 @@ get_object_address_unqualified(ObjectType objtype,
address.objectId = get_event_trigger_oid(name, missing_ok);
address.objectSubId = 0;
break;
+ case OBJECT_PUBLICATION:
+ address.classId = PublicationRelationId;
+ address.objectId = get_publication_oid(name, missing_ok);
+ address.objectSubId = 0;
+ break;
+ case OBJECT_SUBSCRIPTION:
+ address.classId = SubscriptionRelationId;
+ address.objectId = get_subscription_oid(name, missing_ok);
+ address.objectSubId = 0;
+ break;
default:
elog(ERROR, "unrecognized objtype: %d", (int) objtype);
/* placate compiler, which doesn't know elog won't return */
@@ -1743,6 +1807,51 @@ get_object_address_usermapping(List *objname, List *objargs, bool missing_ok)
return address;
}
+/*
+ * Find the ObjectAddress for a publication relation. The objname parameter
+ * is the relation name; objargs contains the publication name.
+ */
+static ObjectAddress
+get_object_address_publication_rel(List *objname, List *objargs,
+ Relation *relation, bool missing_ok)
+{
+ ObjectAddress address;
+ char *pubname;
+ Publication *pub;
+
+ ObjectAddressSet(address, PublicationRelRelationId, InvalidOid);
+
+ *relation = relation_openrv_extended(makeRangeVarFromNameList(objname),
+ AccessShareLock, missing_ok);
+ if (!relation)
+ return address;
+
+ /* fetch publication name from input list */
+ pubname = strVal(linitial(objargs));
+
+ /* Now look up the pg_publication tuple */
+ pub = GetPublicationByName(pubname, missing_ok);
+ if (!pub)
+ return address;
+
+ /* Find the publication relation mapping in syscache. */
+ address.objectId =
+ GetSysCacheOid2(PUBLICATIONRELMAP,
+ ObjectIdGetDatum(RelationGetRelid(*relation)),
+ ObjectIdGetDatum(pub->oid));
+ if (!OidIsValid(address.objectId))
+ {
+ if (!missing_ok)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("publication relation \"%s\" in publication \"%s\" does not exist",
+ RelationGetRelationName(*relation), pubname)));
+ return address;
+ }
+
+ return address;
+}
+
/*
* Find the ObjectAddress for a default ACL.
*/
@@ -2002,6 +2111,7 @@ pg_get_object_address(PG_FUNCTION_ARGS)
case OBJECT_DOMCONSTRAINT:
case OBJECT_CAST:
case OBJECT_USER_MAPPING:
+ case OBJECT_PUBLICATION_REL:
case OBJECT_DEFACL:
case OBJECT_TRANSFORM:
if (list_length(args) != 1)
@@ -2183,6 +2293,16 @@ check_object_ownership(Oid roleid, ObjectType objtype, ObjectAddress address,
format_type_be(targettypeid))));
}
break;
+ case OBJECT_PUBLICATION:
+ if (!pg_publication_ownercheck(address.objectId, roleid))
+ aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_PUBLICATION,
+ NameListToString(objname));
+ break;
+ case OBJECT_SUBSCRIPTION:
+ if (!pg_subscription_ownercheck(address.objectId, roleid))
+ aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_SUBSCRIPTION,
+ NameListToString(objname));
+ break;
case OBJECT_TRANSFORM:
{
TypeName *typename = (TypeName *) linitial(objname);
@@ -3191,6 +3311,41 @@ getObjectDescription(const ObjectAddress *object)
break;
}
+ case OCLASS_PUBLICATION:
+ {
+ appendStringInfo(&buffer, _("publication %s"),
+ get_publication_name(object->objectId));
+ break;
+ }
+
+ case OCLASS_PUBLICATION_REL:
+ {
+ HeapTuple tup;
+ char *pubname;
+ Form_pg_publication_rel prform;
+
+ tup = SearchSysCache1(PUBLICATIONREL,
+ ObjectIdGetDatum(object->objectId));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for publication table %u",
+ object->objectId);
+
+ prform = (Form_pg_publication_rel) GETSTRUCT(tup);
+ pubname = get_publication_name(prform->prpubid);
+
+ appendStringInfo(&buffer, _("publication table %s in publication %s"),
+ get_rel_name(prform->prrelid), pubname);
+ ReleaseSysCache(tup);
+ break;
+ }
+
+ case OCLASS_SUBSCRIPTION:
+ {
+ appendStringInfo(&buffer, _("subscription %s"),
+ get_subscription_name(object->objectId));
+ break;
+ }
+
default:
appendStringInfo(&buffer, "unrecognized object %u %u %d",
object->classId,
@@ -3677,6 +3832,18 @@ getObjectTypeDescription(const ObjectAddress *object)
appendStringInfoString(&buffer, "access method");
break;
+ case OCLASS_PUBLICATION:
+ appendStringInfoString(&buffer, "publication");
+ break;
+
+ case OCLASS_PUBLICATION_REL:
+ appendStringInfoString(&buffer, "publication table");
+ break;
+
+ case OCLASS_SUBSCRIPTION:
+ appendStringInfoString(&buffer, "subscription");
+ break;
+
default:
appendStringInfo(&buffer, "unrecognized %u", object->classId);
break;
@@ -4648,6 +4815,58 @@ getObjectIdentityParts(const ObjectAddress *object,
}
break;
+ case OCLASS_PUBLICATION:
+ {
+ char *pubname;
+
+ pubname = get_publication_name(object->objectId);
+ appendStringInfoString(&buffer,
+ quote_identifier(pubname));
+ if (objname)
+ *objname = list_make1(pubname);
+ break;
+ }
+
+ case OCLASS_PUBLICATION_REL:
+ {
+ HeapTuple tup;
+ char *pubname;
+ Form_pg_publication_rel prform;
+
+ tup = SearchSysCache1(PUBLICATIONREL,
+ ObjectIdGetDatum(object->objectId));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for publication table %u",
+ object->objectId);
+
+ prform = (Form_pg_publication_rel) GETSTRUCT(tup);
+ pubname = get_publication_name(prform->prpubid);
+
+ appendStringInfo(&buffer, _("publication table %s in publication %s"),
+ get_rel_name(prform->prrelid), pubname);
+
+ if (objname)
+ {
+ getRelationIdentity(&buffer, prform->prrelid, objname);
+ *objargs = list_make1(pubname);
+ }
+
+ ReleaseSysCache(tup);
+ break;
+ }
+
+ case OCLASS_SUBSCRIPTION:
+ {
+ char *subname;
+
+ subname = get_subscription_name(object->objectId);
+ appendStringInfoString(&buffer,
+ quote_identifier(subname));
+ if (objname)
+ *objname = list_make1(subname);
+ break;
+ }
+
default:
appendStringInfo(&buffer, "unrecognized object %u %u %d",
object->classId,
diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c
new file mode 100644
index 0000000000..576b7faa04
--- /dev/null
+++ b/src/backend/catalog/pg_publication.c
@@ -0,0 +1,457 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_publication.c
+ * publication C API manipulation
+ *
+ * Copyright (c) 2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * pg_publication.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "funcapi.h"
+#include "miscadmin.h"
+
+#include "access/genam.h"
+#include "access/hash.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/xact.h"
+
+#include "catalog/catalog.h"
+#include "catalog/dependency.h"
+#include "catalog/index.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/objectaddress.h"
+#include "catalog/pg_type.h"
+#include "catalog/pg_publication.h"
+#include "catalog/pg_publication_rel.h"
+
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/catcache.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+/*
+ * Check if relation can be in given publication and throws appropriate
+ * error if not.
+ */
+static void
+check_publication_add_relation(Relation targetrel)
+{
+ /* Must be table */
+ if (RelationGetForm(targetrel)->relkind != RELKIND_RELATION)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("\"%s\" is not a table",
+ RelationGetRelationName(targetrel)),
+ errdetail("Only tables can be added to publications.")));
+
+ /* Can't be system table */
+ if (IsCatalogRelation(targetrel))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("\"%s\" is a system table",
+ RelationGetRelationName(targetrel)),
+ errdetail("System tables cannot be added to publications.")));
+
+ /* UNLOGGED and TEMP relations cannot be part of publication. */
+ if (!RelationNeedsWAL(targetrel))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("table \"%s\" cannot be replicated",
+ RelationGetRelationName(targetrel)),
+ errdetail("Temporary and unlogged relations cannot be replicated.")));
+}
+
+/*
+ * Returns if relation represented by oid and Form_pg_class entry
+ * is publishable.
+ *
+ * Does same checks as the above, but does not need relation to be opened
+ * and also does not throw errors.
+ */
+static bool
+is_publishable_class(Oid relid, Form_pg_class reltuple)
+{
+ return reltuple->relkind == RELKIND_RELATION &&
+ !IsCatalogClass(relid, reltuple) &&
+ reltuple->relpersistence == RELPERSISTENCE_PERMANENT &&
+ /*
+ * Also exclude any tables created as part of initdb. This mainly
+ * affects the preinstalled information_schema.
+ * Note that IsCatalogClass() only checks for these inside pg_catalog
+ * and toast schemas.
+ */
+ relid >= FirstNormalObjectId;
+}
+
+/*
+ * Insert new publication / relation mapping.
+ */
+ObjectAddress
+publication_add_relation(Oid pubid, Relation targetrel,
+ bool if_not_exists)
+{
+ Relation rel;
+ HeapTuple tup;
+ Datum values[Natts_pg_publication_rel];
+ bool nulls[Natts_pg_publication_rel];
+ Oid relid = RelationGetRelid(targetrel);
+ Oid prrelid;
+ Publication *pub = GetPublication(pubid);
+ ObjectAddress myself,
+ referenced;
+
+ rel = heap_open(PublicationRelRelationId, RowExclusiveLock);
+
+ /*
+ * Check for duplicates. Note that this does not really prevent
+ * duplicates, it's here just to provide nicer error message in common
+ * case. The real protection is the unique key on the catalog.
+ */
+ if (SearchSysCacheExists2(PUBLICATIONRELMAP, ObjectIdGetDatum(relid),
+ ObjectIdGetDatum(pubid)))
+ {
+ heap_close(rel, RowExclusiveLock);
+
+ if (if_not_exists)
+ return InvalidObjectAddress;
+
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("relation \"%s\" is already member of publication \"%s\"",
+ RelationGetRelationName(targetrel), pub->name)));
+ }
+
+ check_publication_add_relation(targetrel);
+
+ /* Form a tuple. */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+
+ values[Anum_pg_publication_rel_prpubid - 1] =
+ ObjectIdGetDatum(pubid);
+ values[Anum_pg_publication_rel_prrelid - 1] =
+ ObjectIdGetDatum(relid);
+
+ tup = heap_form_tuple(RelationGetDescr(rel), values, nulls);
+
+ /* Insert tuple into catalog. */
+ prrelid = simple_heap_insert(rel, tup);
+ CatalogUpdateIndexes(rel, tup);
+ heap_freetuple(tup);
+
+ ObjectAddressSet(myself, PublicationRelRelationId, prrelid);
+
+ /* Add dependency on the publication */
+ ObjectAddressSet(referenced, PublicationRelationId, pubid);
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
+
+ /* Add dependency on the relation */
+ ObjectAddressSet(referenced, RelationRelationId, relid);
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
+
+ /* Close the table. */
+ heap_close(rel, RowExclusiveLock);
+
+ /* Invalidate relcache so that publication info is rebuilt. */
+ CacheInvalidateRelcache(targetrel);
+
+ return myself;
+}
+
+
+/*
+ * Gets list of publication oids for a relation oid.
+ */
+List *
+GetRelationPublications(Oid relid)
+{
+ List *result = NIL;
+ CatCList *pubrellist;
+ int i;
+
+ /* Find all publications associated with the relation. */
+ pubrellist = SearchSysCacheList1(PUBLICATIONRELMAP,
+ ObjectIdGetDatum(relid));
+ for (i = 0; i < pubrellist->n_members; i++)
+ {
+ HeapTuple tup = &pubrellist->members[i]->tuple;
+ Oid pubid = ((Form_pg_publication_rel) GETSTRUCT(tup))->prpubid;
+
+ result = lappend_oid(result, pubid);
+ }
+
+ ReleaseSysCacheList(pubrellist);
+
+ return result;
+}
+
+/*
+ * Gets list of relation oids for a publication.
+ *
+ * This should only be used for normal publications, the FOR ALL TABLES
+ * should use GetAllTablesPublicationRelations().
+ */
+List *
+GetPublicationRelations(Oid pubid)
+{
+ List *result;
+ Relation pubrelsrel;
+ ScanKeyData scankey;
+ SysScanDesc scan;
+ HeapTuple tup;
+
+ /* Find all publications associated with the relation. */
+ pubrelsrel = heap_open(PublicationRelRelationId, AccessShareLock);
+
+ ScanKeyInit(&scankey,
+ Anum_pg_publication_rel_prpubid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(pubid));
+
+ scan = systable_beginscan(pubrelsrel, PublicationRelMapIndexId, true,
+ NULL, 1, &scankey);
+
+ result = NIL;
+ while (HeapTupleIsValid(tup = systable_getnext(scan)))
+ {
+ Form_pg_publication_rel pubrel;
+
+ pubrel = (Form_pg_publication_rel) GETSTRUCT(tup);
+
+ result = lappend_oid(result, pubrel->prrelid);
+ }
+
+ systable_endscan(scan);
+ heap_close(pubrelsrel, AccessShareLock);
+
+ return result;
+}
+
+/*
+ * Gets list of publication oids for publications marked as FOR ALL TABLES.
+ */
+List *
+GetAllTablesPublications(void)
+{
+ List *result;
+ Relation rel;
+ ScanKeyData scankey;
+ SysScanDesc scan;
+ HeapTuple tup;
+
+ /* Find all publications that are marked as for all tables. */
+ rel = heap_open(PublicationRelationId, AccessShareLock);
+
+ ScanKeyInit(&scankey,
+ Anum_pg_publication_puballtables,
+ BTEqualStrategyNumber, F_BOOLEQ,
+ BoolGetDatum(true));
+
+ scan = systable_beginscan(rel, InvalidOid, false,
+ NULL, 1, &scankey);
+
+ result = NIL;
+ while (HeapTupleIsValid(tup = systable_getnext(scan)))
+ result = lappend_oid(result, HeapTupleGetOid(tup));
+
+ systable_endscan(scan);
+ heap_close(rel, AccessShareLock);
+
+ return result;
+}
+
+/*
+ * Gets list of all relation published by FOR ALL TABLES publication(s).
+ */
+List *
+GetAllTablesPublicationRelations(void)
+{
+ Relation classRel;
+ ScanKeyData key[1];
+ HeapScanDesc scan;
+ HeapTuple tuple;
+ List *result = NIL;
+
+ classRel = heap_open(RelationRelationId, AccessShareLock);
+
+ ScanKeyInit(&key[0],
+ Anum_pg_class_relkind,
+ BTEqualStrategyNumber, F_CHAREQ,
+ CharGetDatum(RELKIND_RELATION));
+
+ scan = heap_beginscan_catalog(classRel, 1, key);
+
+ while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+ {
+ Oid relid = HeapTupleGetOid(tuple);
+ Form_pg_class relForm = (Form_pg_class) GETSTRUCT(tuple);
+
+ if (is_publishable_class(relid, relForm))
+ result = lappend_oid(result, relid);
+ }
+
+ heap_endscan(scan);
+ heap_close(classRel, AccessShareLock);
+
+ return result;
+}
+
+/*
+ * Get publication using oid
+ *
+ * The Publication struct and it's data are palloced here.
+ */
+Publication *
+GetPublication(Oid pubid)
+{
+ HeapTuple tup;
+ Publication *pub;
+ Form_pg_publication pubform;
+
+ tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid));
+
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for publication %u", pubid);
+
+ pubform = (Form_pg_publication) GETSTRUCT(tup);
+
+ pub = (Publication *) palloc(sizeof(Publication));
+ pub->oid = pubid;
+ pub->name = pstrdup(NameStr(pubform->pubname));
+ pub->alltables = pubform->puballtables;
+ pub->pubactions.pubinsert = pubform->pubinsert;
+ pub->pubactions.pubupdate = pubform->pubupdate;
+ pub->pubactions.pubdelete = pubform->pubdelete;
+
+ ReleaseSysCache(tup);
+
+ return pub;
+}
+
+
+/*
+ * Get Publication using name.
+ */
+Publication *
+GetPublicationByName(const char *pubname, bool missing_ok)
+{
+ Oid oid;
+
+ oid = GetSysCacheOid1(PUBLICATIONNAME, CStringGetDatum(pubname));
+ if (!OidIsValid(oid))
+ {
+ if (missing_ok)
+ return NULL;
+
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("publication \"%s\" does not exist", pubname)));
+ }
+
+ return GetPublication(oid);
+}
+
+/*
+ * get_publication_oid - given a publication name, look up the OID
+ *
+ * If missing_ok is false, throw an error if name not found. If true, just
+ * return InvalidOid.
+ */
+Oid
+get_publication_oid(const char *pubname, bool missing_ok)
+{
+ Oid oid;
+
+ oid = GetSysCacheOid1(PUBLICATIONNAME, CStringGetDatum(pubname));
+ if (!OidIsValid(oid) && !missing_ok)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("publication \"%s\" does not exist", pubname)));
+ return oid;
+}
+
+/*
+ * get_publication_name - given a publication Oid, look up the name
+ */
+char *
+get_publication_name(Oid pubid)
+{
+ HeapTuple tup;
+ char *pubname;
+ Form_pg_publication pubform;
+
+ tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid));
+
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for publication %u", pubid);
+
+ pubform = (Form_pg_publication) GETSTRUCT(tup);
+ pubname = pstrdup(NameStr(pubform->pubname));
+
+ ReleaseSysCache(tup);
+
+ return pubname;
+}
+
+/*
+ * Returns Oids of tables in a publication.
+ */
+Datum
+pg_get_publication_tables(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ char *pubname = text_to_cstring(PG_GETARG_TEXT_PP(0));
+ Publication *publication;
+ List *tables;
+ ListCell **lcp;
+
+ /* stuff done only on the first call of the function */
+ if (SRF_IS_FIRSTCALL())
+ {
+ MemoryContext oldcontext;
+
+ /* create a function context for cross-call persistence */
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ /* switch to memory context appropriate for multiple function calls */
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ publication = GetPublicationByName(pubname, false);
+ if (publication->alltables)
+ tables = GetAllTablesPublicationRelations();
+ else
+ tables = GetPublicationRelations(publication->oid);
+ lcp = (ListCell **) palloc(sizeof(ListCell *));
+ *lcp = list_head(tables);
+ funcctx->user_fctx = (void *) lcp;
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+ lcp = (ListCell **) funcctx->user_fctx;
+
+ while (*lcp != NULL)
+ {
+ Oid relid = lfirst_oid(*lcp);
+
+ *lcp = lnext(*lcp);
+ SRF_RETURN_NEXT(funcctx, ObjectIdGetDatum(relid));
+ }
+
+ SRF_RETURN_DONE(funcctx);
+}
diff --git a/src/backend/catalog/pg_shdepend.c b/src/backend/catalog/pg_shdepend.c
index fb39a01841..60ed957655 100644
--- a/src/backend/catalog/pg_shdepend.c
+++ b/src/backend/catalog/pg_shdepend.c
@@ -39,6 +39,7 @@
#include "catalog/pg_opfamily.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_shdepend.h"
+#include "catalog/pg_subscription.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_ts_config.h"
#include "catalog/pg_ts_dict.h"
@@ -53,7 +54,9 @@
#include "commands/extension.h"
#include "commands/policy.h"
#include "commands/proclang.h"
+#include "commands/publicationcmds.h"
#include "commands/schemacmds.h"
+#include "commands/subscriptioncmds.h"
#include "commands/tablecmds.h"
#include "commands/typecmds.h"
#include "storage/lmgr.h"
@@ -1406,6 +1409,14 @@ shdepReassignOwned(List *roleids, Oid newrole)
AlterEventTriggerOwner_oid(sdepForm->objid, newrole);
break;
+ case PublicationRelationId:
+ AlterPublicationOwner_oid(sdepForm->objid, newrole);
+ break;
+
+ case SubscriptionRelationId:
+ AlterSubscriptionOwner_oid(sdepForm->objid, newrole);
+ break;
+
/* Generic alter owner cases */
case CollationRelationId:
case ConversionRelationId:
diff --git a/src/backend/catalog/pg_subscription.c b/src/backend/catalog/pg_subscription.c
new file mode 100644
index 0000000000..c358ef6c9a
--- /dev/null
+++ b/src/backend/catalog/pg_subscription.c
@@ -0,0 +1,207 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_subscription.c
+ * replication subscriptions
+ *
+ * Copyright (c) 2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/catalog/pg_subscription.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "miscadmin.h"
+
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+
+#include "catalog/pg_type.h"
+#include "catalog/pg_subscription.h"
+
+#include "nodes/makefuncs.h"
+
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/syscache.h"
+
+
+static List *textarray_to_stringlist(ArrayType *textarray);
+
+/*
+ * Fetch the subscription from the syscache.
+ */
+Subscription *
+GetSubscription(Oid subid, bool missing_ok)
+{
+ HeapTuple tup;
+ Subscription *sub;
+ Form_pg_subscription subform;
+ Datum datum;
+ bool isnull;
+
+ tup = SearchSysCache1(SUBSCRIPTIONOID, ObjectIdGetDatum(subid));
+
+ if (!HeapTupleIsValid(tup))
+ {
+ if (missing_ok)
+ return NULL;
+
+ elog(ERROR, "cache lookup failed for subscription %u", subid);
+ }
+
+ subform = (Form_pg_subscription) GETSTRUCT(tup);
+
+ sub = (Subscription *) palloc(sizeof(Subscription));
+ sub->oid = subid;
+ sub->dbid = subform->subdbid;
+ sub->name = pstrdup(NameStr(subform->subname));
+ sub->owner = subform->subowner;
+ sub->enabled = subform->subenabled;
+
+ /* Get conninfo */
+ datum = SysCacheGetAttr(SUBSCRIPTIONOID,
+ tup,
+ Anum_pg_subscription_subconninfo,
+ &isnull);
+ Assert(!isnull);
+ sub->conninfo = pstrdup(TextDatumGetCString(datum));
+
+ /* Get slotname */
+ datum = SysCacheGetAttr(SUBSCRIPTIONOID,
+ tup,
+ Anum_pg_subscription_subslotname,
+ &isnull);
+ Assert(!isnull);
+ sub->slotname = pstrdup(NameStr(*DatumGetName(datum)));
+
+ /* Get publications */
+ datum = SysCacheGetAttr(SUBSCRIPTIONOID,
+ tup,
+ Anum_pg_subscription_subpublications,
+ &isnull);
+ Assert(!isnull);
+ sub->publications = textarray_to_stringlist(DatumGetArrayTypeP(datum));
+
+ ReleaseSysCache(tup);
+
+ return sub;
+}
+
+/*
+ * Return number of subscriptions defined in given database.
+ * Used by dropdb() to check if database can indeed be dropped.
+ */
+int
+CountDBSubscriptions(Oid dbid)
+{
+ int nsubs = 0;
+ Relation rel;
+ ScanKeyData scankey;
+ SysScanDesc scan;
+ HeapTuple tup;
+
+ rel = heap_open(SubscriptionRelationId, RowExclusiveLock);
+
+ ScanKeyInit(&scankey,
+ Anum_pg_subscription_subdbid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(dbid));
+
+ scan = systable_beginscan(rel, InvalidOid, false,
+ NULL, 1, &scankey);
+
+ while (HeapTupleIsValid(tup = systable_getnext(scan)))
+ nsubs++;
+
+ systable_endscan(scan);
+
+ heap_close(rel, NoLock);
+
+ return nsubs;
+}
+
+/*
+ * Free memory allocated by subscription struct.
+ */
+void
+FreeSubscription(Subscription *sub)
+{
+ pfree(sub->name);
+ pfree(sub->conninfo);
+ pfree(sub->slotname);
+ list_free_deep(sub->publications);
+ pfree(sub);
+}
+
+/*
+ * get_subscription_oid - given a subscription name, look up the OID
+ *
+ * If missing_ok is false, throw an error if name not found. If true, just
+ * return InvalidOid.
+ */
+Oid
+get_subscription_oid(const char *subname, bool missing_ok)
+{
+ Oid oid;
+
+ oid = GetSysCacheOid2(SUBSCRIPTIONNAME, MyDatabaseId,
+ CStringGetDatum(subname));
+ if (!OidIsValid(oid) && !missing_ok)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("subscription \"%s\" does not exist", subname)));
+ return oid;
+}
+
+/*
+ * get_subscription_name - given a subscription OID, look up the name
+ */
+char *
+get_subscription_name(Oid subid)
+{
+ HeapTuple tup;
+ char *subname;
+ Form_pg_subscription subform;
+
+ tup = SearchSysCache1(SUBSCRIPTIONOID, ObjectIdGetDatum(subid));
+
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for subscription %u", subid);
+
+ subform = (Form_pg_subscription) GETSTRUCT(tup);
+ subname = pstrdup(NameStr(subform->subname));
+
+ ReleaseSysCache(tup);
+
+ return subname;
+}
+
+/*
+ * Convert text array to list of strings.
+ *
+ * Note: the resulting list of strings is pallocated here.
+ */
+static List *
+textarray_to_stringlist(ArrayType *textarray)
+{
+ Datum *elems;
+ int nelems, i;
+ List *res = NIL;
+
+ deconstruct_array(textarray,
+ TEXTOID, -1, false, 'i',
+ &elems, NULL, &nelems);
+
+ if (nelems == 0)
+ return NIL;
+
+ for (i = 0; i < nelems; i++)
+ res = lappend(res, makeString(pstrdup(TextDatumGetCString(elems[i]))));
+
+ return res;
+}
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 07f291b7cd..4dfedf89b6 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -248,6 +248,15 @@ CREATE VIEW pg_stats WITH (security_barrier) AS
REVOKE ALL on pg_statistic FROM public;
+CREATE VIEW pg_publication_tables AS
+ SELECT
+ P.pubname AS pubname,
+ N.nspname AS schemaname,
+ C.relname AS tablename
+ FROM pg_publication P, pg_class C
+ JOIN pg_namespace N ON (N.oid = C.relnamespace)
+ WHERE C.oid IN (SELECT relid FROM pg_get_publication_tables(P.pubname));
+
CREATE VIEW pg_locks AS
SELECT * FROM pg_lock_status() AS L;
@@ -708,6 +717,20 @@ CREATE VIEW pg_stat_wal_receiver AS
FROM pg_stat_get_wal_receiver() s
WHERE s.pid IS NOT NULL;
+CREATE VIEW pg_stat_subscription AS
+ SELECT
+ su.oid AS subid,
+ su.subname,
+ st.pid,
+ st.received_lsn,
+ st.last_msg_send_time,
+ st.last_msg_receipt_time,
+ st.latest_end_lsn,
+ st.latest_end_time
+ FROM pg_subscription su
+ LEFT JOIN pg_stat_get_subscription(NULL) st
+ ON (st.subid = su.oid);
+
CREATE VIEW pg_stat_ssl AS
SELECT
S.pid,
@@ -866,6 +889,8 @@ CREATE VIEW pg_replication_origin_status AS
REVOKE ALL ON pg_replication_origin_status FROM public;
+REVOKE ALL ON pg_subscription FROM public;
+
--
-- We have a few function definitions in here, too.
-- At some point there might be enough to justify breaking them out into
diff --git a/src/backend/commands/Makefile b/src/backend/commands/Makefile
index 6b3742c0a0..e0fab38cbe 100644
--- a/src/backend/commands/Makefile
+++ b/src/backend/commands/Makefile
@@ -17,9 +17,9 @@ OBJS = amcmds.o aggregatecmds.o alter.o analyze.o async.o cluster.o comment.o \
dbcommands.o define.o discard.o dropcmds.o \
event_trigger.o explain.o extension.o foreigncmds.o functioncmds.o \
indexcmds.o lockcmds.o matview.o operatorcmds.o opclasscmds.o \
- policy.o portalcmds.o prepare.o proclang.o \
- schemacmds.o seclabel.o sequence.o tablecmds.o tablespace.o trigger.o \
- tsearchcmds.o typecmds.o user.o vacuum.o vacuumlazy.o \
- variable.o view.o
+ policy.o portalcmds.o prepare.o proclang.o publicationcmds.o \
+ schemacmds.o seclabel.o sequence.o subscriptioncmds.o tablecmds.o \
+ tablespace.o trigger.o tsearchcmds.o typecmds.o user.o vacuum.o \
+ vacuumlazy.o variable.o view.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/commands/alter.c b/src/backend/commands/alter.c
index 8b6f420909..768fcc82dd 100644
--- a/src/backend/commands/alter.c
+++ b/src/backend/commands/alter.c
@@ -45,7 +45,9 @@
#include "commands/extension.h"
#include "commands/policy.h"
#include "commands/proclang.h"
+#include "commands/publicationcmds.h"
#include "commands/schemacmds.h"
+#include "commands/subscriptioncmds.h"
#include "commands/tablecmds.h"
#include "commands/tablespace.h"
#include "commands/trigger.h"
@@ -770,6 +772,14 @@ ExecAlterOwnerStmt(AlterOwnerStmt *stmt)
return AlterEventTriggerOwner(strVal(linitial(stmt->object)),
newowner);
+ case OBJECT_PUBLICATION:
+ return AlterPublicationOwner(strVal(linitial(stmt->object)),
+ newowner);
+
+ case OBJECT_SUBSCRIPTION:
+ return AlterSubscriptionOwner(strVal(linitial(stmt->object)),
+ newowner);
+
/* Generic cases */
case OBJECT_AGGREGATE:
case OBJECT_COLLATION:
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 2833f3e846..6ad8fd77b1 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -37,6 +37,7 @@
#include "catalog/pg_authid.h"
#include "catalog/pg_database.h"
#include "catalog/pg_db_role_setting.h"
+#include "catalog/pg_subscription.h"
#include "catalog/pg_tablespace.h"
#include "commands/comment.h"
#include "commands/dbcommands.h"
@@ -790,6 +791,7 @@ dropdb(const char *dbname, bool missing_ok)
int npreparedxacts;
int nslots,
nslots_active;
+ int nsubscriptions;
/*
* Look up the target database's OID, and get exclusive lock on it. We
@@ -874,6 +876,21 @@ dropdb(const char *dbname, bool missing_ok)
dbname),
errdetail_busy_db(notherbackends, npreparedxacts)));
+ /*
+ * Check if there are subscriptions defined in the target database.
+ *
+ * We can't drop them automatically because they might be holding
+ * resources in other databases/instances.
+ */
+ if ((nsubscriptions = CountDBSubscriptions(db_id)) > 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_IN_USE),
+ errmsg("database \"%s\" is being used by logical replication subscription",
+ dbname),
+ errdetail_plural("There is %d subscription.",
+ "There are %d subscriptions.",
+ nsubscriptions, nsubscriptions)));
+
/*
* Remove the database's tuple from pg_database.
*/
diff --git a/src/backend/commands/define.c b/src/backend/commands/define.c
index 714b5252c7..8da924517b 100644
--- a/src/backend/commands/define.c
+++ b/src/backend/commands/define.c
@@ -319,3 +319,31 @@ defGetTypeLength(DefElem *def)
def->defname, defGetString(def))));
return 0; /* keep compiler quiet */
}
+
+/*
+ * Extract a list of string values (otherwise uninterpreted) from a DefElem.
+ */
+List *
+defGetStringList(DefElem *def)
+{
+ ListCell *cell;
+
+ if (def->arg == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("%s requires a parameter",
+ def->defname)));
+ if (nodeTag(def->arg) != T_List)
+ elog(ERROR, "unrecognized node type: %d", (int) nodeTag(def->arg));
+
+ foreach(cell, (List *)def->arg)
+ {
+ Node *str = (Node *) lfirst(cell);
+
+ if (!IsA(str, String))
+ elog(ERROR, "unexpected node type in name list: %d",
+ (int) nodeTag(str));
+ }
+
+ return (List *) def->arg;
+}
diff --git a/src/backend/commands/dropcmds.c b/src/backend/commands/dropcmds.c
index 96436c0689..8cfbcf43f7 100644
--- a/src/backend/commands/dropcmds.c
+++ b/src/backend/commands/dropcmds.c
@@ -441,6 +441,10 @@ does_not_exist_skipping(ObjectType objtype, List *objname, List *objargs)
}
}
break;
+ case OBJECT_PUBLICATION:
+ msg = gettext_noop("publication \"%s\" does not exist, skipping");
+ name = NameListToString(objname);
+ break;
default:
elog(ERROR, "unrecognized object type: %d", (int) objtype);
break;
diff --git a/src/backend/commands/event_trigger.c b/src/backend/commands/event_trigger.c
index c0061e195e..8125537361 100644
--- a/src/backend/commands/event_trigger.c
+++ b/src/backend/commands/event_trigger.c
@@ -106,11 +106,13 @@ static event_trigger_support_data event_trigger_support[] = {
{"OPERATOR CLASS", true},
{"OPERATOR FAMILY", true},
{"POLICY", true},
+ {"PUBLICATION", true},
{"ROLE", false},
{"RULE", true},
{"SCHEMA", true},
{"SEQUENCE", true},
{"SERVER", true},
+ {"SUBSCRIPTION", true},
{"TABLE", true},
{"TABLESPACE", false},
{"TRANSFORM", true},
@@ -1103,9 +1105,12 @@ EventTriggerSupportsObjectType(ObjectType obtype)
case OBJECT_OPERATOR:
case OBJECT_OPFAMILY:
case OBJECT_POLICY:
+ case OBJECT_PUBLICATION:
+ case OBJECT_PUBLICATION_REL:
case OBJECT_RULE:
case OBJECT_SCHEMA:
case OBJECT_SEQUENCE:
+ case OBJECT_SUBSCRIPTION:
case OBJECT_TABCONSTRAINT:
case OBJECT_TABLE:
case OBJECT_TRANSFORM:
@@ -1168,6 +1173,9 @@ EventTriggerSupportsObjectClass(ObjectClass objclass)
case OCLASS_EXTENSION:
case OCLASS_POLICY:
case OCLASS_AM:
+ case OCLASS_PUBLICATION:
+ case OCLASS_PUBLICATION_REL:
+ case OCLASS_SUBSCRIPTION:
return true;
}
diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
new file mode 100644
index 0000000000..21e523deb0
--- /dev/null
+++ b/src/backend/commands/publicationcmds.c
@@ -0,0 +1,754 @@
+/*-------------------------------------------------------------------------
+ *
+ * publicationcmds.c
+ * publication manipulation
+ *
+ * Copyright (c) 2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * publicationcmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "funcapi.h"
+#include "miscadmin.h"
+
+#include "access/genam.h"
+#include "access/hash.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/xact.h"
+
+#include "catalog/catalog.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
+#include "catalog/objectaddress.h"
+#include "catalog/pg_inherits_fn.h"
+#include "catalog/pg_type.h"
+#include "catalog/pg_publication.h"
+#include "catalog/pg_publication_rel.h"
+
+#include "commands/dbcommands.h"
+#include "commands/defrem.h"
+#include "commands/event_trigger.h"
+#include "commands/publicationcmds.h"
+
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/catcache.h"
+#include "utils/fmgroids.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+/* Same as MAXNUMMESSAGES in sinvaladt.c */
+#define MAX_RELCACHE_INVAL_MSGS 4096
+
+static List *OpenTableList(List *tables);
+static void CloseTableList(List *rels);
+static void PublicationAddTables(Oid pubid, List *rels, bool if_not_exists,
+ AlterPublicationStmt *stmt);
+static void PublicationDropTables(Oid pubid, List *rels, bool missing_ok);
+
+static void
+parse_publication_options(List *options,
+ bool *publish_insert_given,
+ bool *publish_insert,
+ bool *publish_update_given,
+ bool *publish_update,
+ bool *publish_delete_given,
+ bool *publish_delete)
+{
+ ListCell *lc;
+
+ *publish_insert_given = false;
+ *publish_update_given = false;
+ *publish_delete_given = false;
+
+ /* Defaults are true */
+ *publish_insert = true;
+ *publish_update = true;
+ *publish_delete = true;
+
+ /* Parse options */
+ foreach (lc, options)
+ {
+ DefElem *defel = (DefElem *) lfirst(lc);
+
+ if (strcmp(defel->defname, "publish insert") == 0)
+ {
+ if (*publish_insert_given)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+
+ *publish_insert_given = true;
+ *publish_insert = defGetBoolean(defel);
+ }
+ else if (strcmp(defel->defname, "nopublish insert") == 0)
+ {
+ if (*publish_insert_given)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+
+ *publish_insert_given = true;
+ *publish_insert = !defGetBoolean(defel);
+ }
+ else if (strcmp(defel->defname, "publish update") == 0)
+ {
+ if (*publish_update_given)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+
+ *publish_update_given = true;
+ *publish_update = defGetBoolean(defel);
+ }
+ else if (strcmp(defel->defname, "nopublish update") == 0)
+ {
+ if (*publish_update_given)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+
+ *publish_update_given = true;
+ *publish_update = !defGetBoolean(defel);
+ }
+ else if (strcmp(defel->defname, "publish delete") == 0)
+ {
+ if (*publish_delete_given)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+
+ *publish_delete_given = true;
+ *publish_delete = defGetBoolean(defel);
+ }
+ else if (strcmp(defel->defname, "nopublish delete") == 0)
+ {
+ if (*publish_delete_given)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+
+ *publish_delete_given = true;
+ *publish_delete = !defGetBoolean(defel);
+ }
+ else
+ elog(ERROR, "unrecognized option: %s", defel->defname);
+ }
+}
+
+/*
+ * Create new publication.
+ */
+ObjectAddress
+CreatePublication(CreatePublicationStmt *stmt)
+{
+ Relation rel;
+ ObjectAddress myself;
+ Oid puboid;
+ bool nulls[Natts_pg_publication];
+ Datum values[Natts_pg_publication];
+ HeapTuple tup;
+ bool publish_insert_given;
+ bool publish_update_given;
+ bool publish_delete_given;
+ bool publish_insert;
+ bool publish_update;
+ bool publish_delete;
+ AclResult aclresult;
+
+ /* must have CREATE privilege on database */
+ aclresult = pg_database_aclcheck(MyDatabaseId, GetUserId(), ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, ACL_KIND_DATABASE,
+ get_database_name(MyDatabaseId));
+
+ /* FOR ALL TABLES requires superuser */
+ if (stmt->for_all_tables && !superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ (errmsg("must be superuser to create FOR ALL TABLES publication"))));
+
+ rel = heap_open(PublicationRelationId, RowExclusiveLock);
+
+ /* Check if name is used */
+ puboid = GetSysCacheOid1(PUBLICATIONNAME, CStringGetDatum(stmt->pubname));
+ if (OidIsValid(puboid))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("publication \"%s\" already exists",
+ stmt->pubname)));
+ }
+
+ /* Form a tuple. */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+
+ values[Anum_pg_publication_pubname - 1] =
+ DirectFunctionCall1(namein, CStringGetDatum(stmt->pubname));
+ values[Anum_pg_publication_pubowner - 1] = ObjectIdGetDatum(GetUserId());
+
+ parse_publication_options(stmt->options,
+ &publish_insert_given, &publish_insert,
+ &publish_update_given, &publish_update,
+ &publish_delete_given, &publish_delete);
+
+ values[Anum_pg_publication_puballtables - 1] =
+ BoolGetDatum(stmt->for_all_tables);
+ values[Anum_pg_publication_pubinsert - 1] =
+ BoolGetDatum(publish_insert);
+ values[Anum_pg_publication_pubupdate - 1] =
+ BoolGetDatum(publish_update);
+ values[Anum_pg_publication_pubdelete - 1] =
+ BoolGetDatum(publish_delete);
+
+ tup = heap_form_tuple(RelationGetDescr(rel), values, nulls);
+
+ /* Insert tuple into catalog. */
+ puboid = simple_heap_insert(rel, tup);
+ CatalogUpdateIndexes(rel, tup);
+ heap_freetuple(tup);
+
+ ObjectAddressSet(myself, PublicationRelationId, puboid);
+
+ /* Make the changes visible. */
+ CommandCounterIncrement();
+
+ if (stmt->tables)
+ {
+ List *rels;
+
+ Assert(list_length(stmt->tables) > 0);
+
+ rels = OpenTableList(stmt->tables);
+ PublicationAddTables(puboid, rels, true, NULL);
+ CloseTableList(rels);
+ }
+
+ heap_close(rel, RowExclusiveLock);
+
+ InvokeObjectPostCreateHook(PublicationRelationId, puboid, 0);
+
+ return myself;
+}
+
+/*
+ * Change options of a publication.
+ */
+static void
+AlterPublicationOptions(AlterPublicationStmt *stmt, Relation rel,
+ HeapTuple tup)
+{
+ bool nulls[Natts_pg_publication];
+ bool replaces[Natts_pg_publication];
+ Datum values[Natts_pg_publication];
+ bool publish_insert_given;
+ bool publish_update_given;
+ bool publish_delete_given;
+ bool publish_insert;
+ bool publish_update;
+ bool publish_delete;
+ ObjectAddress obj;
+
+ parse_publication_options(stmt->options,
+ &publish_insert_given, &publish_insert,
+ &publish_update_given, &publish_update,
+ &publish_delete_given, &publish_delete);
+
+ /* Everything ok, form a new tuple. */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+ memset(replaces, false, sizeof(replaces));
+
+ if (publish_insert_given)
+ {
+ values[Anum_pg_publication_pubinsert - 1] =
+ BoolGetDatum(publish_insert);
+ replaces[Anum_pg_publication_pubinsert - 1] = true;
+ }
+ if (publish_update_given)
+ {
+ values[Anum_pg_publication_pubupdate - 1] =
+ BoolGetDatum(publish_update);
+ replaces[Anum_pg_publication_pubupdate - 1] = true;
+ }
+ if (publish_delete_given)
+ {
+ values[Anum_pg_publication_pubdelete - 1] =
+ BoolGetDatum(publish_delete);
+ replaces[Anum_pg_publication_pubdelete - 1] = true;
+ }
+
+ tup = heap_modify_tuple(tup, RelationGetDescr(rel), values, nulls,
+ replaces);
+
+ /* Update the catalog. */
+ simple_heap_update(rel, &tup->t_self, tup);
+ CatalogUpdateIndexes(rel, tup);
+
+ CommandCounterIncrement();
+
+ /* Invalidate the relcache. */
+ if (((Form_pg_publication) GETSTRUCT(tup))->puballtables)
+ {
+ CacheInvalidateRelcacheAll();
+ }
+ else
+ {
+ List *relids = GetPublicationRelations(HeapTupleGetOid(tup));
+
+ /*
+ * We don't want to send too many individual messages, at some point
+ * it's cheaper to just reset whole relcache.
+ */
+ if (list_length(relids) < MAX_RELCACHE_INVAL_MSGS)
+ {
+ ListCell *lc;
+
+ foreach (lc, relids)
+ {
+ Oid relid = lfirst_oid(lc);
+
+ CacheInvalidateRelcacheByRelid(relid);
+ }
+ }
+ else
+ CacheInvalidateRelcacheAll();
+ }
+
+ ObjectAddressSet(obj, PublicationRelationId, HeapTupleGetOid(tup));
+ EventTriggerCollectSimpleCommand(obj, InvalidObjectAddress,
+ (Node *) stmt);
+
+ InvokeObjectPostAlterHook(PublicationRelationId, HeapTupleGetOid(tup), 0);
+}
+
+/*
+ * Add or remove table to/from publication.
+ */
+static void
+AlterPublicationTables(AlterPublicationStmt *stmt, Relation rel,
+ HeapTuple tup)
+{
+ Oid pubid = HeapTupleGetOid(tup);
+ List *rels = NIL;
+ Form_pg_publication pubform = (Form_pg_publication) GETSTRUCT(tup);
+
+ /* Check that user is allowed to manipulate the publication tables. */
+ if (pubform->puballtables)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("publication \"%s\" is defined as FOR ALL TABLES",
+ NameStr(pubform->pubname)),
+ errdetail("Tables cannot be added to or dropped from FOR ALL TABLES publications.")));
+
+ Assert(list_length(stmt->tables) > 0);
+
+ rels = OpenTableList(stmt->tables);
+
+ if (stmt->tableAction == DEFELEM_ADD)
+ PublicationAddTables(pubid, rels, false, stmt);
+ else if (stmt->tableAction == DEFELEM_DROP)
+ PublicationDropTables(pubid, rels, false);
+ else /* DEFELEM_SET */
+ {
+ List *oldrelids = GetPublicationRelations(pubid);
+ List *delrels = NIL;
+ ListCell *oldlc;
+
+ /* Calculate which relations to drop. */
+ foreach(oldlc, oldrelids)
+ {
+ Oid oldrelid = lfirst_oid(oldlc);
+ ListCell *newlc;
+ bool found = false;
+
+ foreach(newlc, rels)
+ {
+ Relation newrel = (Relation) lfirst(newlc);
+
+ if (RelationGetRelid(newrel) == oldrelid)
+ {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ {
+ Relation oldrel = heap_open(oldrelid,
+ ShareUpdateExclusiveLock);
+ delrels = lappend(delrels, oldrel);
+ }
+ }
+
+ /* And drop them. */
+ PublicationDropTables(pubid, delrels, true);
+
+ /*
+ * Don't bother calculating the difference for adding, we'll catch
+ * and skip existing ones when doing catalog update.
+ */
+ PublicationAddTables(pubid, rels, true, stmt);
+
+ CloseTableList(delrels);
+ }
+
+ CloseTableList(rels);
+}
+
+/*
+ * Alter the existing publication.
+ *
+ * This is dispatcher function for AlterPublicationOptions and
+ * AlterPublicationTables.
+ */
+void
+AlterPublication(AlterPublicationStmt *stmt)
+{
+ Relation rel;
+ HeapTuple tup;
+
+ rel = heap_open(PublicationRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy1(PUBLICATIONNAME,
+ CStringGetDatum(stmt->pubname));
+
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("publication \"%s\" does not exist",
+ stmt->pubname)));
+
+ /* must be owner */
+ if (!pg_publication_ownercheck(HeapTupleGetOid(tup), GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_PUBLICATION,
+ stmt->pubname);
+
+ if (stmt->options)
+ AlterPublicationOptions(stmt, rel, tup);
+ else
+ AlterPublicationTables(stmt, rel, tup);
+
+ /* Cleanup. */
+ heap_freetuple(tup);
+ heap_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Drop publication by OID
+ */
+void
+RemovePublicationById(Oid pubid)
+{
+ Relation rel;
+ HeapTuple tup;
+
+ rel = heap_open(PublicationRelationId, RowExclusiveLock);
+
+ tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid));
+
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for publication %u", pubid);
+
+ simple_heap_delete(rel, &tup->t_self);
+
+ ReleaseSysCache(tup);
+
+ heap_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Remove relation from publication by mapping OID.
+ */
+void
+RemovePublicationRelById(Oid proid)
+{
+ Relation rel;
+ HeapTuple tup;
+ Form_pg_publication_rel pubrel;
+
+ rel = heap_open(PublicationRelRelationId, RowExclusiveLock);
+
+ tup = SearchSysCache1(PUBLICATIONREL, ObjectIdGetDatum(proid));
+
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for publication table %u",
+ proid);
+
+
+ pubrel = (Form_pg_publication_rel) GETSTRUCT(tup);
+
+ /* Invalidate relcache so that publication info is rebuilt. */
+ CacheInvalidateRelcacheByRelid(pubrel->prrelid);
+
+ simple_heap_delete(rel, &tup->t_self);
+
+ ReleaseSysCache(tup);
+
+ heap_close(rel, RowExclusiveLock);
+}
+
+/*
+ * Open relations based om provided by RangeVar list.
+ * The returned tables are locked in ShareUpdateExclusiveLock mode.
+ */
+static List *
+OpenTableList(List *tables)
+{
+ List *relids = NIL;
+ List *rels = NIL;
+ ListCell *lc;
+
+ /*
+ * Open, share-lock, and check all the explicitly-specified relations
+ */
+ foreach(lc, tables)
+ {
+ RangeVar *rv = lfirst(lc);
+ Relation rel;
+ bool recurse = rv->inh;
+ Oid myrelid;
+
+ CHECK_FOR_INTERRUPTS();
+
+ rel = heap_openrv(rv, ShareUpdateExclusiveLock);
+ myrelid = RelationGetRelid(rel);
+ /*
+ * filter out duplicates when user specifies "foo, foo"
+ * Note that this algrithm is know to not be very effective (O(N^2))
+ * but given that it only works on list of tables given to us by user
+ * it's deemed acceptable.
+ */
+ if (list_member_oid(relids, myrelid))
+ {
+ heap_close(rel, ShareUpdateExclusiveLock);
+ continue;
+ }
+ rels = lappend(rels, rel);
+ relids = lappend_oid(relids, myrelid);
+
+ if (recurse)
+ {
+ ListCell *child;
+ List *children;
+
+ children = find_all_inheritors(myrelid, ShareUpdateExclusiveLock,
+ NULL);
+
+ foreach(child, children)
+ {
+ Oid childrelid = lfirst_oid(child);
+
+ if (list_member_oid(relids, childrelid))
+ continue;
+
+ /*
+ * Skip duplicates if user specified both parent and child
+ * tables.
+ */
+ if (list_member_oid(relids, childrelid))
+ {
+ heap_close(rel, ShareUpdateExclusiveLock);
+ continue;
+ }
+
+ /* find_all_inheritors already got lock */
+ rel = heap_open(childrelid, NoLock);
+ rels = lappend(rels, rel);
+ relids = lappend_oid(relids, childrelid);
+ }
+ }
+ }
+
+ list_free(relids);
+
+ return rels;
+}
+
+/*
+ * Close all relations in the list.
+ */
+static void
+CloseTableList(List *rels)
+{
+ ListCell *lc;
+
+ foreach(lc, rels)
+ {
+ Relation rel = (Relation) lfirst(lc);
+
+ heap_close(rel, NoLock);
+ }
+}
+
+/*
+ * Add listed tables to the publication.
+ */
+static void
+PublicationAddTables(Oid pubid, List *rels, bool if_not_exists,
+ AlterPublicationStmt *stmt)
+{
+ ListCell *lc;
+
+ Assert(!stmt || !stmt->for_all_tables);
+
+ foreach(lc, rels)
+ {
+ Relation rel = (Relation) lfirst(lc);
+ ObjectAddress obj;
+
+ /* Must be owner of the table or superuser. */
+ if (!pg_class_ownercheck(RelationGetRelid(rel), GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS,
+ RelationGetRelationName(rel));
+
+ obj = publication_add_relation(pubid, rel, if_not_exists);
+ if (stmt)
+ {
+ EventTriggerCollectSimpleCommand(obj, InvalidObjectAddress,
+ (Node *) stmt);
+
+ InvokeObjectPostCreateHook(PublicationRelRelationId,
+ obj.objectId, 0);
+ }
+ }
+}
+
+/*
+ * Remove listed tables from the publication.
+ */
+static void
+PublicationDropTables(Oid pubid, List *rels, bool missing_ok)
+{
+ ObjectAddress obj;
+ ListCell *lc;
+ Oid prid;
+
+ foreach(lc, rels)
+ {
+ Relation rel = (Relation) lfirst(lc);
+ Oid relid = RelationGetRelid(rel);
+
+ prid = GetSysCacheOid2(PUBLICATIONRELMAP, ObjectIdGetDatum(relid),
+ ObjectIdGetDatum(pubid));
+ if (!OidIsValid(prid))
+ {
+ if (missing_ok)
+ continue;
+
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("relation \"%s\" is not part of the publication",
+ RelationGetRelationName(rel))));
+ }
+
+ ObjectAddressSet(obj, PublicationRelRelationId, prid);
+ performDeletion(&obj, DROP_CASCADE, 0);
+ }
+}
+
+/*
+ * Internal workhorse for changing a publication owner
+ */
+ static void
+AlterPublicationOwner_internal(Relation rel, HeapTuple tup, Oid newOwnerId)
+{
+ Form_pg_publication form;
+
+ form = (Form_pg_publication) GETSTRUCT(tup);
+
+ if (form->pubowner == newOwnerId)
+ return;
+
+ if (!pg_publication_ownercheck(HeapTupleGetOid(tup), GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_PUBLICATION,
+ NameStr(form->pubname));
+
+ /* New owner must be a superuser */
+ if (!superuser_arg(newOwnerId))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to change owner of publication \"%s\"",
+ NameStr(form->pubname)),
+ errhint("The owner of a publication must be a superuser.")));
+
+ form->pubowner = newOwnerId;
+ simple_heap_update(rel, &tup->t_self, tup);
+ CatalogUpdateIndexes(rel, tup);
+
+ /* Update owner dependency reference */
+ changeDependencyOnOwner(PublicationRelationId,
+ HeapTupleGetOid(tup),
+ newOwnerId);
+
+ InvokeObjectPostAlterHook(PublicationRelationId,
+ HeapTupleGetOid(tup), 0);
+}
+
+/*
+ * Change publication owner -- by name
+ */
+ObjectAddress
+AlterPublicationOwner(const char *name, Oid newOwnerId)
+{
+ Oid subid;
+ HeapTuple tup;
+ Relation rel;
+ ObjectAddress address;
+
+ rel = heap_open(PublicationRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy1(PUBLICATIONNAME, CStringGetDatum(name));
+
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("publication \"%s\" does not exist", name)));
+
+ subid = HeapTupleGetOid(tup);
+
+ AlterPublicationOwner_internal(rel, tup, newOwnerId);
+
+ ObjectAddressSet(address, PublicationRelationId, subid);
+
+ heap_freetuple(tup);
+
+ heap_close(rel, RowExclusiveLock);
+
+ return address;
+}
+
+/*
+ * Change publication owner -- by OID
+ */
+void
+AlterPublicationOwner_oid(Oid subid, Oid newOwnerId)
+{
+ HeapTuple tup;
+ Relation rel;
+
+ rel = heap_open(PublicationRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy1(PUBLICATIONOID, ObjectIdGetDatum(subid));
+
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("publication with OID %u does not exist", subid)));
+
+ AlterPublicationOwner_internal(rel, tup, newOwnerId);
+
+ heap_freetuple(tup);
+
+ heap_close(rel, RowExclusiveLock);
+}
diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c
new file mode 100644
index 0000000000..1448ee3bee
--- /dev/null
+++ b/src/backend/commands/subscriptioncmds.c
@@ -0,0 +1,643 @@
+/*-------------------------------------------------------------------------
+ *
+ * subscriptioncmds.c
+ * subscription catalog manipulation functions
+ *
+ * Copyright (c) 2015, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * subscriptioncmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "miscadmin.h"
+
+#include "access/heapam.h"
+#include "access/htup_details.h"
+
+#include "catalog/indexing.h"
+#include "catalog/objectaccess.h"
+#include "catalog/objectaddress.h"
+#include "catalog/pg_type.h"
+#include "catalog/pg_subscription.h"
+
+#include "commands/defrem.h"
+#include "commands/event_trigger.h"
+#include "commands/subscriptioncmds.h"
+
+#include "replication/logicallauncher.h"
+#include "replication/origin.h"
+#include "replication/walreceiver.h"
+#include "replication/worker_internal.h"
+
+#include "storage/lmgr.h"
+
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+#include "utils/syscache.h"
+
+/*
+ * Common option parsing function for CREATE and ALTER SUBSCRIPTION commands.
+ *
+ * Since not all options can be specified in both commands, this function
+ * will report an error on options if the target output pointer is NULL to
+ * accomodate that.
+ */
+static void
+parse_subscription_options(List *options, char **conninfo,
+ List **publications, bool *enabled_given,
+ bool *enabled, bool *create_slot, char **slot_name)
+{
+ ListCell *lc;
+ bool create_slot_given = false;
+
+ if (conninfo)
+ *conninfo = NULL;
+ if (publications)
+ *publications = NIL;
+ if (enabled)
+ {
+ *enabled_given = false;
+ *enabled = true;
+ }
+ if (create_slot)
+ *create_slot = true;
+ if (slot_name)
+ *slot_name = NULL;
+
+ /* Parse options */
+ foreach (lc, options)
+ {
+ DefElem *defel = (DefElem *) lfirst(lc);
+
+ if (strcmp(defel->defname, "conninfo") == 0 && conninfo)
+ {
+ if (*conninfo)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+
+ *conninfo = defGetString(defel);
+ }
+ else if (strcmp(defel->defname, "publication") == 0 && publications)
+ {
+ if (*publications)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+
+ *publications = defGetStringList(defel);
+ }
+ else if (strcmp(defel->defname, "enabled") == 0 && enabled)
+ {
+ if (*enabled_given)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+
+ *enabled_given = true;
+ *enabled = defGetBoolean(defel);
+ }
+ else if (strcmp(defel->defname, "disabled") == 0 && enabled)
+ {
+ if (*enabled_given)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+
+ *enabled_given = true;
+ *enabled = !defGetBoolean(defel);
+ }
+ else if (strcmp(defel->defname, "create slot") == 0 && create_slot)
+ {
+ if (create_slot_given)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+
+ create_slot_given = true;
+ *create_slot = defGetBoolean(defel);
+ }
+ else if (strcmp(defel->defname, "nocreate slot") == 0 && create_slot)
+ {
+ if (create_slot_given)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+
+ create_slot_given = true;
+ *create_slot = !defGetBoolean(defel);
+ }
+ else if (strcmp(defel->defname, "slot name") == 0 && slot_name)
+ {
+ if (*slot_name)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+
+ *slot_name = defGetString(defel);
+ }
+ else
+ elog(ERROR, "unrecognized option: %s", defel->defname);
+ }
+}
+
+/*
+ * Auxiliary function to return a text array out of a list of String nodes.
+ */
+static Datum
+publicationListToArray(List *publist)
+{
+ ArrayType *arr;
+ Datum *datums;
+ int j = 0;
+ ListCell *cell;
+ MemoryContext memcxt;
+ MemoryContext oldcxt;
+
+ /* Create memory context for temporary allocations. */
+ memcxt = AllocSetContextCreate(CurrentMemoryContext,
+ "publicationListToArray to array",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+ oldcxt = MemoryContextSwitchTo(memcxt);
+
+ datums = palloc(sizeof(text *) * list_length(publist));
+ foreach(cell, publist)
+ {
+ char *name = strVal(lfirst(cell));
+ ListCell *pcell;
+
+ /* Check for duplicates. */
+ foreach(pcell, publist)
+ {
+ char *pname = strVal(lfirst(cell));
+
+ if (name == pname)
+ break;
+
+ if (strcmp(name, pname) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("publication name \"%s\" used more than once",
+ pname)));
+ }
+
+ datums[j++] = CStringGetTextDatum(name);
+ }
+
+ MemoryContextSwitchTo(oldcxt);
+
+ arr = construct_array(datums, list_length(publist),
+ TEXTOID, -1, false, 'i');
+ MemoryContextDelete(memcxt);
+
+ return PointerGetDatum(arr);
+}
+
+/*
+ * Create new subscription.
+ */
+ObjectAddress
+CreateSubscription(CreateSubscriptionStmt *stmt)
+{
+ Relation rel;
+ ObjectAddress myself;
+ Oid subid;
+ bool nulls[Natts_pg_subscription];
+ Datum values[Natts_pg_subscription];
+ HeapTuple tup;
+ bool enabled_given;
+ bool enabled;
+ char *conninfo;
+ char *slotname;
+ char originname[NAMEDATALEN];
+ bool create_slot;
+ List *publications;
+
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ (errmsg("must be superuser to create subscriptions"))));
+
+ rel = heap_open(SubscriptionRelationId, RowExclusiveLock);
+
+ /* Check if name is used */
+ subid = GetSysCacheOid2(SUBSCRIPTIONNAME, MyDatabaseId,
+ CStringGetDatum(stmt->subname));
+ if (OidIsValid(subid))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("subscription \"%s\" already exists",
+ stmt->subname)));
+ }
+
+ /*
+ * Parse and check options.
+ * Connection and publication should not be specified here.
+ */
+ parse_subscription_options(stmt->options, NULL, NULL,
+ &enabled_given, &enabled,
+ &create_slot, &slotname);
+ if (slotname == NULL)
+ slotname = stmt->subname;
+
+ conninfo = stmt->conninfo;
+ publications = stmt->publication;
+
+ /* Load the library providing us libpq calls. */
+ load_file("libpqwalreceiver", false);
+
+ /* Check the connection info string. */
+ walrcv_check_conninfo(conninfo);
+
+ /* Everything ok, form a new tuple. */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+
+ values[Anum_pg_subscription_subdbid - 1] = ObjectIdGetDatum(MyDatabaseId);
+ values[Anum_pg_subscription_subname - 1] =
+ DirectFunctionCall1(namein, CStringGetDatum(stmt->subname));
+ values[Anum_pg_subscription_subowner - 1] = ObjectIdGetDatum(GetUserId());
+ values[Anum_pg_subscription_subenabled - 1] = BoolGetDatum(enabled);
+ values[Anum_pg_subscription_subconninfo - 1] =
+ CStringGetTextDatum(conninfo);
+ values[Anum_pg_subscription_subslotname - 1] =
+ DirectFunctionCall1(namein, CStringGetDatum(slotname));
+ values[Anum_pg_subscription_subpublications - 1] =
+ publicationListToArray(publications);
+
+ tup = heap_form_tuple(RelationGetDescr(rel), values, nulls);
+
+ /* Insert tuple into catalog. */
+ subid = simple_heap_insert(rel, tup);
+ CatalogUpdateIndexes(rel, tup);
+ heap_freetuple(tup);
+
+ snprintf(originname, sizeof(originname), "pg_%u", subid);
+ replorigin_create(originname);
+
+ /*
+ * If requested, create the replication slot on remote side for our
+ * newly created subscription.
+ */
+ if (create_slot)
+ {
+ XLogRecPtr lsn;
+ char *err;
+ WalReceiverConn *wrconn;
+
+ /* Try to connect to the publisher. */
+ wrconn = walrcv_connect(conninfo, true, stmt->subname, &err);
+ if (!wrconn)
+ ereport(ERROR,
+ (errmsg("could not connect to the publisher: %s", err)));
+
+ walrcv_create_slot(wrconn, slotname, false, &lsn);
+ ereport(NOTICE,
+ (errmsg("created replication slot \"%s\" on publisher",
+ slotname)));
+
+ /* And we are done with the remote side. */
+ walrcv_disconnect(wrconn);
+ }
+
+ heap_close(rel, RowExclusiveLock);
+
+ ApplyLauncherWakeupAtCommit();
+
+ ObjectAddressSet(myself, SubscriptionRelationId, subid);
+
+ InvokeObjectPostCreateHook(SubscriptionRelationId, subid, 0);
+
+ return myself;
+}
+
+/*
+ * Alter the existing subscription.
+ */
+ObjectAddress
+AlterSubscription(AlterSubscriptionStmt *stmt)
+{
+ Relation rel;
+ ObjectAddress myself;
+ bool nulls[Natts_pg_subscription];
+ bool replaces[Natts_pg_subscription];
+ Datum values[Natts_pg_subscription];
+ HeapTuple tup;
+ Oid subid;
+ bool enabled_given;
+ bool enabled;
+ char *conninfo;
+ char *slot_name;
+ List *publications;
+
+ rel = heap_open(SubscriptionRelationId, RowExclusiveLock);
+
+ /* Fetch the existing tuple. */
+ tup = SearchSysCacheCopy2(SUBSCRIPTIONNAME, MyDatabaseId,
+ CStringGetDatum(stmt->subname));
+
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("subscription \"%s\" does not exist",
+ stmt->subname)));
+
+ /* must be owner */
+ if (!pg_subscription_ownercheck(HeapTupleGetOid(tup), GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_SUBSCRIPTION,
+ stmt->subname);
+
+ subid = HeapTupleGetOid(tup);
+
+ /* Parse options. */
+ parse_subscription_options(stmt->options, &conninfo, &publications,
+ &enabled_given, &enabled,
+ NULL, &slot_name);
+
+ /* Form a new tuple. */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+ memset(replaces, false, sizeof(replaces));
+
+ if (enabled_given)
+ {
+ values[Anum_pg_subscription_subenabled - 1] = BoolGetDatum(enabled);
+ replaces[Anum_pg_subscription_subenabled - 1] = true;
+ }
+ if (conninfo)
+ {
+ values[Anum_pg_subscription_subconninfo - 1] =
+ CStringGetTextDatum(conninfo);
+ replaces[Anum_pg_subscription_subconninfo - 1] = true;
+ }
+ if (slot_name)
+ {
+ values[Anum_pg_subscription_subslotname - 1] =
+ DirectFunctionCall1(namein, CStringGetDatum(slot_name));
+ replaces[Anum_pg_subscription_subslotname - 1] = true;
+ }
+ if (publications != NIL)
+ {
+ values[Anum_pg_subscription_subpublications - 1] =
+ publicationListToArray(publications);
+ replaces[Anum_pg_subscription_subpublications - 1] = true;
+ }
+
+ tup = heap_modify_tuple(tup, RelationGetDescr(rel), values, nulls,
+ replaces);
+
+ /* Update the catalog. */
+ simple_heap_update(rel, &tup->t_self, tup);
+ CatalogUpdateIndexes(rel, tup);
+
+ ObjectAddressSet(myself, SubscriptionRelationId, subid);
+
+ /* Cleanup. */
+ heap_freetuple(tup);
+ heap_close(rel, RowExclusiveLock);
+
+ InvokeObjectPostAlterHook(SubscriptionRelationId, subid, 0);
+
+ return myself;
+}
+
+/*
+ * Drop a subscription
+ */
+void
+DropSubscription(DropSubscriptionStmt *stmt)
+{
+ Relation rel;
+ ObjectAddress myself;
+ HeapTuple tup;
+ Oid subid;
+ Datum datum;
+ bool isnull;
+ char *subname;
+ char *conninfo;
+ char *slotname;
+ char originname[NAMEDATALEN];
+ char *err = NULL;
+ RepOriginId originid;
+ WalReceiverConn *wrconn = NULL;
+ StringInfoData cmd;
+
+ rel = heap_open(SubscriptionRelationId, RowExclusiveLock);
+
+ tup = SearchSysCache2(SUBSCRIPTIONNAME, MyDatabaseId,
+ CStringGetDatum(stmt->subname));
+
+ if (!HeapTupleIsValid(tup))
+ {
+ heap_close(rel, NoLock);
+
+ if (!stmt->missing_ok)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("subscription \"%s\" does not exist",
+ stmt->subname)));
+ else
+ ereport(NOTICE,
+ (errmsg("subscription \"%s\" does not exist, skipping",
+ stmt->subname)));
+
+ return;
+ }
+
+ subid = HeapTupleGetOid(tup);
+
+ /* must be owner */
+ if (!pg_subscription_ownercheck(subid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_SUBSCRIPTION,
+ stmt->subname);
+
+ /* DROP hook for the subscription being removed */
+ InvokeObjectDropHook(SubscriptionRelationId, subid, 0);
+
+ /*
+ * Lock the subscription so noboby else can do anything with it
+ * (including the replication workers).
+ */
+ LockSharedObject(SubscriptionRelationId, subid, 0, AccessExclusiveLock);
+
+ /* Get subname */
+ datum = SysCacheGetAttr(SUBSCRIPTIONOID, tup,
+ Anum_pg_subscription_subname, &isnull);
+ Assert(!isnull);
+ subname = pstrdup(NameStr(*DatumGetName(datum)));
+
+ /* Get conninfo */
+ datum = SysCacheGetAttr(SUBSCRIPTIONOID, tup,
+ Anum_pg_subscription_subconninfo, &isnull);
+ Assert(!isnull);
+ conninfo = pstrdup(TextDatumGetCString(datum));
+
+ /* Get slotname */
+ datum = SysCacheGetAttr(SUBSCRIPTIONOID, tup,
+ Anum_pg_subscription_subslotname, &isnull);
+ Assert(!isnull);
+ slotname = pstrdup(NameStr(*DatumGetName(datum)));
+
+ ObjectAddressSet(myself, SubscriptionRelationId, subid);
+ EventTriggerSQLDropAddObject(&myself, true, true);
+
+ /* Remove the tuple from catalog. */
+ simple_heap_delete(rel, &tup->t_self);
+
+ ReleaseSysCache(tup);
+
+ /* Protect against launcher restarting the worker. */
+ LWLockAcquire(LogicalRepLauncherLock, LW_EXCLUSIVE);
+
+ /* Kill the apply worker so that the slot becomes accessible. */
+ logicalrep_worker_stop(subid);
+
+ /* Remove the origin tracking if exists. */
+ snprintf(originname, sizeof(originname), "pg_%u", subid);
+ originid = replorigin_by_name(originname, true);
+ if (originid != InvalidRepOriginId)
+ replorigin_drop(originid);
+
+ /* If the user asked to not drop the slot, we are done mow.*/
+ if (!stmt->drop_slot)
+ {
+ heap_close(rel, NoLock);
+ return;
+ }
+
+ /*
+ * Otherwise drop the replication slot at the publisher node using
+ * the replication connection.
+ */
+ load_file("libpqwalreceiver", false);
+
+ initStringInfo(&cmd);
+ appendStringInfo(&cmd, "DROP_REPLICATION_SLOT \"%s\"", slotname);
+
+ wrconn = walrcv_connect(conninfo, true, subname, &err);
+ if (wrconn == NULL)
+ ereport(ERROR,
+ (errmsg("could not connect to publisher when attempting to "
+ "drop the replication slot \"%s\"", slotname),
+ errdetail("The error was: %s", err)));
+
+ if (!walrcv_command(wrconn, cmd.data, &err))
+ ereport(ERROR,
+ (errmsg("count not drop the replication slot \"%s\" on publisher",
+ slotname),
+ errdetail("The error was: %s", err)));
+ else
+ ereport(NOTICE,
+ (errmsg("dropped replication slot \"%s\" on publisher",
+ slotname)));
+
+ walrcv_disconnect(wrconn);
+
+ pfree(cmd.data);
+
+ heap_close(rel, NoLock);
+}
+
+/*
+ * Internal workhorse for changing a subscription owner
+ */
+static void
+AlterSubscriptionOwner_internal(Relation rel, HeapTuple tup, Oid newOwnerId)
+{
+ Form_pg_subscription form;
+
+ form = (Form_pg_subscription) GETSTRUCT(tup);
+
+ if (form->subowner == newOwnerId)
+ return;
+
+ if (!pg_subscription_ownercheck(HeapTupleGetOid(tup), GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_SUBSCRIPTION,
+ NameStr(form->subname));
+
+ /* New owner must be a superuser */
+ if (!superuser_arg(newOwnerId))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to change owner of subscription \"%s\"",
+ NameStr(form->subname)),
+ errhint("The owner of an subscription must be a superuser.")));
+
+ form->subowner = newOwnerId;
+ simple_heap_update(rel, &tup->t_self, tup);
+ CatalogUpdateIndexes(rel, tup);
+
+ /* Update owner dependency reference */
+ changeDependencyOnOwner(SubscriptionRelationId,
+ HeapTupleGetOid(tup),
+ newOwnerId);
+
+ InvokeObjectPostAlterHook(SubscriptionRelationId,
+ HeapTupleGetOid(tup), 0);
+}
+
+/*
+ * Change subscription owner -- by name
+ */
+ObjectAddress
+AlterSubscriptionOwner(const char *name, Oid newOwnerId)
+{
+ Oid subid;
+ HeapTuple tup;
+ Relation rel;
+ ObjectAddress address;
+
+ rel = heap_open(SubscriptionRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy2(SUBSCRIPTIONNAME, MyDatabaseId,
+ CStringGetDatum(name));
+
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("subscription \"%s\" does not exist", name)));
+
+ subid = HeapTupleGetOid(tup);
+
+ AlterSubscriptionOwner_internal(rel, tup, newOwnerId);
+
+ ObjectAddressSet(address, SubscriptionRelationId, subid);
+
+ heap_freetuple(tup);
+
+ heap_close(rel, RowExclusiveLock);
+
+ return address;
+}
+
+/*
+ * Change subscription owner -- by OID
+ */
+void
+AlterSubscriptionOwner_oid(Oid subid, Oid newOwnerId)
+{
+ HeapTuple tup;
+ Relation rel;
+
+ rel = heap_open(SubscriptionRelationId, RowExclusiveLock);
+
+ tup = SearchSysCacheCopy1(SUBSCRIPTIONOID, ObjectIdGetDatum(subid));
+
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("subscription with OID %u does not exist", subid)));
+
+ AlterSubscriptionOwner_internal(rel, tup, newOwnerId);
+
+ heap_freetuple(tup);
+
+ heap_close(rel, RowExclusiveLock);
+}
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 6ed2a3dc4d..c4b0011bdd 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -12055,6 +12055,18 @@ ATPrepChangePersistence(Relation rel, bool toLogged)
break;
}
+ /*
+ * Check that the table is not part any publication when changing to
+ * UNLOGGED as UNLOGGED tables can't be published.
+ */
+ if (!toLogged &&
+ list_length(GetRelationPublications(RelationGetRelid(rel))) > 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot change table \"%s\" to unlogged because it is part of a publication",
+ RelationGetRelationName(rel)),
+ errdetail("Unlogged relations cannot be replicated.")));
+
/*
* Check existing foreign key constraints to preserve the invariant that
* permanent tables cannot reference unlogged ones. Self-referencing
diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile
index c51415830a..2a2b7eb9bd 100644
--- a/src/backend/executor/Makefile
+++ b/src/backend/executor/Makefile
@@ -14,7 +14,7 @@ include $(top_builddir)/src/Makefile.global
OBJS = execAmi.o execCurrent.o execGrouping.o execIndexing.o execJunk.o \
execMain.o execParallel.o execProcnode.o execQual.o \
- execScan.o execTuples.o \
+ execReplication.o execScan.o execTuples.o \
execUtils.o functions.o instrument.o nodeAppend.o nodeAgg.o \
nodeBitmapAnd.o nodeBitmapOr.o \
nodeBitmapHeapscan.o nodeBitmapIndexscan.o \
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index e6edcc06c2..0bc146ca47 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -43,6 +43,7 @@
#include "access/xact.h"
#include "catalog/namespace.h"
#include "catalog/partition.h"
+#include "catalog/pg_publication.h"
#include "commands/matview.h"
#include "commands/trigger.h"
#include "executor/execdebug.h"
@@ -1024,7 +1025,7 @@ CheckValidResultRel(Relation resultRel, CmdType operation)
{
case RELKIND_RELATION:
case RELKIND_PARTITIONED_TABLE:
- /* OK */
+ CheckCmdReplicaIdentity(resultRel, operation);
break;
case RELKIND_SEQUENCE:
ereport(ERROR,
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
new file mode 100644
index 0000000000..a18ae512db
--- /dev/null
+++ b/src/backend/executor/execReplication.c
@@ -0,0 +1,553 @@
+/*-------------------------------------------------------------------------
+ *
+ * execReplication.c
+ * miscellaneous executor routines for logical replication
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/executor/execReplication.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/relscan.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "commands/trigger.h"
+#include "executor/executor.h"
+#include "nodes/nodeFuncs.h"
+#include "parser/parse_relation.h"
+#include "parser/parsetree.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "utils/datum.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+#include "utils/tqual.h"
+
+
+/*
+ * Setup a ScanKey for a search in the relation 'rel' for a tuple 'key' that
+ * is setup to match 'rel' (*NOT* idxrel!).
+ *
+ * Returns whether any column contains NULLs.
+ *
+ * This is not generic routine, it expects the idxrel to be replication
+ * identity of a rel and meet all limitations associated with that.
+ */
+static bool
+build_replindex_scan_key(ScanKey skey, Relation rel, Relation idxrel,
+ TupleTableSlot *searchslot)
+{
+ int attoff;
+ bool isnull;
+ Datum indclassDatum;
+ oidvector *opclass;
+ int2vector *indkey = &idxrel->rd_index->indkey;
+ bool hasnulls = false;
+
+ Assert(RelationGetReplicaIndex(rel) == RelationGetRelid(idxrel));
+
+ indclassDatum = SysCacheGetAttr(INDEXRELID, idxrel->rd_indextuple,
+ Anum_pg_index_indclass, &isnull);
+ Assert(!isnull);
+ opclass = (oidvector *) DatumGetPointer(indclassDatum);
+
+ /* Build scankey for every attribute in the index. */
+ for (attoff = 0; attoff < RelationGetNumberOfAttributes(idxrel); attoff++)
+ {
+ Oid operator;
+ Oid opfamily;
+ RegProcedure regop;
+ int pkattno = attoff + 1;
+ int mainattno = indkey->values[attoff];
+ Oid optype = get_opclass_input_type(opclass->values[attoff]);
+
+ /*
+ * Load the operator info. We need this to get the equality operator
+ * function for the scan key.
+ */
+ opfamily = get_opclass_family(opclass->values[attoff]);
+
+ operator = get_opfamily_member(opfamily, optype,
+ optype,
+ BTEqualStrategyNumber);
+
+ if (!OidIsValid(operator))
+ elog(ERROR, "could not find member %d(%u,%u) of opfamily %u",
+ BTEqualStrategyNumber, optype, optype, opfamily);
+
+ regop = get_opcode(operator);
+
+ /* Initialize the scankey. */
+ ScanKeyInit(&skey[attoff],
+ pkattno,
+ BTEqualStrategyNumber,
+ regop,
+ searchslot->tts_values[mainattno - 1]);
+
+ /* Check for null value. */
+ if (searchslot->tts_isnull[mainattno - 1])
+ {
+ hasnulls = true;
+ skey[attoff].sk_flags |= SK_ISNULL;
+ }
+ }
+
+ return hasnulls;
+}
+
+/*
+ * Search the relation 'rel' for tuple using the index.
+ *
+ * If a matching tuple is found, lock it with lockmode, fill the slot with its
+ * contents, and return true. Return false otherwise.
+ */
+bool
+RelationFindReplTupleByIndex(Relation rel, Oid idxoid,
+ LockTupleMode lockmode,
+ TupleTableSlot *searchslot,
+ TupleTableSlot *outslot)
+{
+ HeapTuple scantuple;
+ ScanKeyData skey[INDEX_MAX_KEYS];
+ IndexScanDesc scan;
+ SnapshotData snap;
+ TransactionId xwait;
+ Relation idxrel;
+ bool found;
+
+ /* Open the index.*/
+ idxrel = index_open(idxoid, RowExclusiveLock);
+
+ /* Start an index scan. */
+ InitDirtySnapshot(snap);
+ scan = index_beginscan(rel, idxrel, &snap,
+ RelationGetNumberOfAttributes(idxrel),
+ 0);
+
+ /* Build scan key. */
+ build_replindex_scan_key(skey, rel, idxrel, searchslot);
+
+retry:
+ found = false;
+
+ index_rescan(scan, skey, RelationGetNumberOfAttributes(idxrel), NULL, 0);
+
+ /* Try to find the tuple */
+ if ((scantuple = index_getnext(scan, ForwardScanDirection)) != NULL)
+ {
+ found = true;
+ ExecStoreTuple(scantuple, outslot, InvalidBuffer, false);
+ ExecMaterializeSlot(outslot);
+
+ xwait = TransactionIdIsValid(snap.xmin) ?
+ snap.xmin : snap.xmax;
+
+ /*
+ * If the tuple is locked, wait for locking transaction to finish
+ * and retry.
+ */
+ if (TransactionIdIsValid(xwait))
+ {
+ XactLockTableWait(xwait, NULL, NULL, XLTW_None);
+ goto retry;
+ }
+ }
+
+ /* Found tuple, try to lock it in the lockmode. */
+ if (found)
+ {
+ Buffer buf;
+ HeapUpdateFailureData hufd;
+ HTSU_Result res;
+ HeapTupleData locktup;
+
+ ItemPointerCopy(&outslot->tts_tuple->t_self, &locktup.t_self);
+
+ PushActiveSnapshot(GetLatestSnapshot());
+
+ res = heap_lock_tuple(rel, &locktup, GetCurrentCommandId(false),
+ lockmode,
+ false /* wait */,
+ false /* don't follow updates */,
+ &buf, &hufd);
+ /* the tuple slot already has the buffer pinned */
+ ReleaseBuffer(buf);
+
+ PopActiveSnapshot();
+
+ switch (res)
+ {
+ case HeapTupleMayBeUpdated:
+ break;
+ case HeapTupleUpdated:
+ /* XXX: Improve handling here */
+ ereport(LOG,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("concurrent update, retrying")));
+ goto retry;
+ case HeapTupleInvisible:
+ elog(ERROR, "attempted to lock invisible tuple");
+ default:
+ elog(ERROR, "unexpected heap_lock_tuple status: %u", res);
+ break;
+ }
+ }
+
+ index_endscan(scan);
+
+ /* Don't release lock until commit. */
+ index_close(idxrel, NoLock);
+
+ return found;
+}
+
+/*
+ * Compare the tuple and slot and check if they have equal values.
+ *
+ * We use binary datum comparison which might return false negatives but
+ * that's the best we can do here as there may be multiple notions of
+ * equality for the data types and table columns don't specify which one
+ * to use.
+ */
+static bool
+tuple_equals_slot(TupleDesc desc, HeapTuple tup, TupleTableSlot *slot)
+{
+ Datum values[MaxTupleAttributeNumber];
+ bool isnull[MaxTupleAttributeNumber];
+ int attrnum;
+ Form_pg_attribute att;
+
+ heap_deform_tuple(tup, desc, values, isnull);
+
+ /* Check equality of the attributes. */
+ for (attrnum = 0; attrnum < desc->natts; attrnum++)
+ {
+ /*
+ * If one value is NULL and other is not, then they are certainly not
+ * equal
+ */
+ if (isnull[attrnum] != slot->tts_isnull[attrnum])
+ return false;
+
+ /*
+ * If both are NULL, they can be considered equal.
+ */
+ if (isnull[attrnum])
+ continue;
+
+ att = desc->attrs[attrnum];
+ if (!datumIsEqual(values[attrnum], slot->tts_values[attrnum],
+ att->attbyval, att->attlen))
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Search the relation 'rel' for tuple using the sequential scan.
+ *
+ * If a matching tuple is found, lock it with lockmode, fill the slot with its
+ * contents, and return true. Return false otherwise.
+ *
+ * Note that this stops on the first matching tuple.
+ *
+ * This can obviously be quite slow on tables that have more than few rows.
+ */
+bool
+RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode,
+ TupleTableSlot *searchslot, TupleTableSlot *outslot)
+{
+ HeapTuple scantuple;
+ HeapScanDesc scan;
+ SnapshotData snap;
+ TransactionId xwait;
+ bool found;
+ TupleDesc desc = RelationGetDescr(rel);
+
+ Assert(equalTupleDescs(desc, outslot->tts_tupleDescriptor));
+
+ /* Start an index scan. */
+ InitDirtySnapshot(snap);
+ scan = heap_beginscan(rel, &snap, 0, NULL);
+
+retry:
+ found = false;
+
+ heap_rescan(scan, NULL);
+
+ /* Try to find the tuple */
+ while ((scantuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+ {
+ if (!tuple_equals_slot(desc, scantuple, searchslot))
+ continue;
+
+ found = true;
+ ExecStoreTuple(scantuple, outslot, InvalidBuffer, false);
+ ExecMaterializeSlot(outslot);
+
+ xwait = TransactionIdIsValid(snap.xmin) ?
+ snap.xmin : snap.xmax;
+
+ /*
+ * If the tuple is locked, wait for locking transaction to finish
+ * and retry.
+ */
+ if (TransactionIdIsValid(xwait))
+ {
+ XactLockTableWait(xwait, NULL, NULL, XLTW_None);
+ goto retry;
+ }
+ }
+
+ /* Found tuple, try to lock it in the lockmode. */
+ if (found)
+ {
+ Buffer buf;
+ HeapUpdateFailureData hufd;
+ HTSU_Result res;
+ HeapTupleData locktup;
+
+ ItemPointerCopy(&outslot->tts_tuple->t_self, &locktup.t_self);
+
+ PushActiveSnapshot(GetLatestSnapshot());
+
+ res = heap_lock_tuple(rel, &locktup, GetCurrentCommandId(false),
+ lockmode,
+ false /* wait */,
+ false /* don't follow updates */,
+ &buf, &hufd);
+ /* the tuple slot already has the buffer pinned */
+ ReleaseBuffer(buf);
+
+ PopActiveSnapshot();
+
+ switch (res)
+ {
+ case HeapTupleMayBeUpdated:
+ break;
+ case HeapTupleUpdated:
+ /* XXX: Improve handling here */
+ ereport(LOG,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("concurrent update, retrying")));
+ goto retry;
+ case HeapTupleInvisible:
+ elog(ERROR, "attempted to lock invisible tuple");
+ default:
+ elog(ERROR, "unexpected heap_lock_tuple status: %u", res);
+ break;
+ }
+ }
+
+ heap_endscan(scan);
+
+ return found;
+}
+
+/*
+ * Insert tuple represented in the slot to the relation, update the indexes,
+ * and execute any constraints and per-row triggers.
+ *
+ * Caller is responsible for opening the indexes.
+ */
+void
+ExecSimpleRelationInsert(EState *estate, TupleTableSlot *slot)
+{
+ bool skip_tuple = false;
+ HeapTuple tuple;
+ ResultRelInfo *resultRelInfo = estate->es_result_relation_info;
+ Relation rel = resultRelInfo->ri_RelationDesc;
+
+ /* For now we support only tables. */
+ Assert(rel->rd_rel->relkind == RELKIND_RELATION);
+
+ CheckCmdReplicaIdentity(rel, CMD_INSERT);
+
+ /* BEFORE ROW INSERT Triggers */
+ if (resultRelInfo->ri_TrigDesc &&
+ resultRelInfo->ri_TrigDesc->trig_insert_before_row)
+ {
+ slot = ExecBRInsertTriggers(estate, resultRelInfo, slot);
+
+ if (slot == NULL) /* "do nothing" */
+ skip_tuple = true;
+ }
+
+ if (!skip_tuple)
+ {
+ List *recheckIndexes = NIL;
+
+ /* Check the constraints of the tuple */
+ if (rel->rd_att->constr)
+ ExecConstraints(resultRelInfo, slot, slot, estate);
+
+ /* Store the slot into tuple that we can insett. */
+ tuple = ExecMaterializeSlot(slot);
+
+ /* OK, store the tuple and create index entries for it */
+ simple_heap_insert(rel, tuple);
+
+ if (resultRelInfo->ri_NumIndices > 0)
+ recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
+ estate, false, NULL,
+ NIL);
+
+ /* AFTER ROW INSERT Triggers */
+ ExecARInsertTriggers(estate, resultRelInfo, tuple,
+ recheckIndexes);
+
+ list_free(recheckIndexes);
+ }
+}
+
+/*
+ * Find the searchslot tuple and update it with data in the slot,
+ * update the indexes, and execute any constraints and per-row triggers.
+ *
+ * Caller is responsible for opening the indexes.
+ */
+void
+ExecSimpleRelationUpdate(EState *estate, EPQState *epqstate,
+ TupleTableSlot *searchslot, TupleTableSlot *slot)
+{
+ bool skip_tuple = false;
+ HeapTuple tuple;
+ ResultRelInfo *resultRelInfo = estate->es_result_relation_info;
+ Relation rel = resultRelInfo->ri_RelationDesc;
+
+ /* For now we support only tables. */
+ Assert(rel->rd_rel->relkind == RELKIND_RELATION);
+
+ CheckCmdReplicaIdentity(rel, CMD_UPDATE);
+
+ /* BEFORE ROW INSERT Triggers */
+ if (resultRelInfo->ri_TrigDesc &&
+ resultRelInfo->ri_TrigDesc->trig_update_before_row)
+ {
+ slot = ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
+ &searchslot->tts_tuple->t_self,
+ NULL, slot);
+
+ if (slot == NULL) /* "do nothing" */
+ skip_tuple = true;
+ }
+
+ if (!skip_tuple)
+ {
+ List *recheckIndexes = NIL;
+
+ /* Check the constraints of the tuple */
+ if (rel->rd_att->constr)
+ ExecConstraints(resultRelInfo, slot, slot, estate);
+
+ /* Store the slot into tuple that we can write. */
+ tuple = ExecMaterializeSlot(slot);
+
+ /* OK, update the tuple and index entries for it */
+ simple_heap_update(rel, &searchslot->tts_tuple->t_self,
+ slot->tts_tuple);
+
+ if (resultRelInfo->ri_NumIndices > 0 &&
+ !HeapTupleIsHeapOnly(slot->tts_tuple))
+ recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
+ estate, false, NULL,
+ NIL);
+
+ /* AFTER ROW UPDATE Triggers */
+ ExecARUpdateTriggers(estate, resultRelInfo,
+ &searchslot->tts_tuple->t_self,
+ NULL, tuple, recheckIndexes);
+
+ list_free(recheckIndexes);
+ }
+}
+
+/*
+ * Find the searchslot tuple and delete it, and execute any constraints
+ * and per-row triggers.
+ *
+ * Caller is responsible for opening the indexes.
+ */
+void
+ExecSimpleRelationDelete(EState *estate, EPQState *epqstate,
+ TupleTableSlot *searchslot)
+{
+ bool skip_tuple = false;
+ ResultRelInfo *resultRelInfo = estate->es_result_relation_info;
+ Relation rel = resultRelInfo->ri_RelationDesc;
+
+ /* For now we support only tables. */
+ Assert(rel->rd_rel->relkind == RELKIND_RELATION);
+
+ CheckCmdReplicaIdentity(rel, CMD_DELETE);
+
+ /* BEFORE ROW INSERT Triggers */
+ if (resultRelInfo->ri_TrigDesc &&
+ resultRelInfo->ri_TrigDesc->trig_update_before_row)
+ {
+ skip_tuple = !ExecBRDeleteTriggers(estate, epqstate, resultRelInfo,
+ &searchslot->tts_tuple->t_self,
+ NULL);
+ }
+
+ if (!skip_tuple)
+ {
+ List *recheckIndexes = NIL;
+
+ /* OK, delete the tuple */
+ simple_heap_delete(rel, &searchslot->tts_tuple->t_self);
+
+ /* AFTER ROW DELETE Triggers */
+ ExecARDeleteTriggers(estate, resultRelInfo,
+ &searchslot->tts_tuple->t_self, NULL);
+
+ list_free(recheckIndexes);
+ }
+}
+
+/*
+ * Check if command can be executed with current replica identity.
+ */
+void
+CheckCmdReplicaIdentity(Relation rel, CmdType cmd)
+{
+ PublicationActions *pubactions;
+
+ /* We only need to do checks for UPDATE and DELETE. */
+ if (cmd != CMD_UPDATE && cmd != CMD_DELETE)
+ return;
+
+ /* If relation has replica identity we are always good. */
+ if (rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL ||
+ OidIsValid(RelationGetReplicaIndex(rel)))
+ return;
+
+ /*
+ * This is either UPDATE OR DELETE and there is no replica identity.
+ *
+ * Check if the table publishes UPDATES or DELETES.
+ */
+ pubactions = GetRelationPublicationActions(rel);
+ if (cmd == CMD_UPDATE && pubactions->pubupdate)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot update table \"%s\" because it does not have replica identity and publishes updates",
+ RelationGetRelationName(rel)),
+ errhint("To enable updating the table, set REPLICA IDENTITY using ALTER TABLE.")));
+ else if (cmd == CMD_DELETE && pubactions->pubdelete)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("cannot delete from table \"%s\" because it does not have replica identity and publishes deletes",
+ RelationGetRelationName(rel)),
+ errhint("To enable deleting from the table, set REPLICA IDENTITY using ALTER TABLE.")));
+}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index f871e9d4bb..30d733e57a 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -4286,6 +4286,69 @@ _copyPartitionCmd(const PartitionCmd *from)
return newnode;
}
+static CreatePublicationStmt *
+_copyCreatePublicationStmt(const CreatePublicationStmt *from)
+{
+ CreatePublicationStmt *newnode = makeNode(CreatePublicationStmt);
+
+ COPY_STRING_FIELD(pubname);
+ COPY_NODE_FIELD(options);
+ COPY_NODE_FIELD(tables);
+ COPY_SCALAR_FIELD(for_all_tables);
+
+ return newnode;
+}
+
+static AlterPublicationStmt *
+_copyAlterPublicationStmt(const AlterPublicationStmt *from)
+{
+ AlterPublicationStmt *newnode = makeNode(AlterPublicationStmt);
+
+ COPY_STRING_FIELD(pubname);
+ COPY_NODE_FIELD(options);
+ COPY_NODE_FIELD(tables);
+ COPY_SCALAR_FIELD(for_all_tables);
+ COPY_SCALAR_FIELD(tableAction);
+
+ return newnode;
+}
+
+static CreateSubscriptionStmt *
+_copyCreateSubscriptionStmt(const CreateSubscriptionStmt *from)
+{
+ CreateSubscriptionStmt *newnode = makeNode(CreateSubscriptionStmt);
+
+ COPY_STRING_FIELD(subname);
+ COPY_STRING_FIELD(conninfo);
+ COPY_NODE_FIELD(publication);
+ COPY_NODE_FIELD(options);
+
+ return newnode;
+}
+
+static AlterSubscriptionStmt *
+_copyAlterSubscriptionStmt(const AlterSubscriptionStmt *from)
+{
+ AlterSubscriptionStmt *newnode = makeNode(AlterSubscriptionStmt);
+
+ COPY_STRING_FIELD(subname);
+ COPY_NODE_FIELD(options);
+
+ return newnode;
+}
+
+static DropSubscriptionStmt *
+_copyDropSubscriptionStmt(const DropSubscriptionStmt *from)
+{
+ DropSubscriptionStmt *newnode = makeNode(DropSubscriptionStmt);
+
+ COPY_STRING_FIELD(subname);
+ COPY_SCALAR_FIELD(drop_slot);
+ COPY_SCALAR_FIELD(missing_ok);
+
+ return newnode;
+}
+
/* ****************************************************************
* pg_list.h copy functions
* ****************************************************************
@@ -5086,6 +5149,21 @@ copyObject(const void *from)
case T_AlterPolicyStmt:
retval = _copyAlterPolicyStmt(from);
break;
+ case T_CreatePublicationStmt:
+ retval = _copyCreatePublicationStmt(from);
+ break;
+ case T_AlterPublicationStmt:
+ retval = _copyAlterPublicationStmt(from);
+ break;
+ case T_CreateSubscriptionStmt:
+ retval = _copyCreateSubscriptionStmt(from);
+ break;
+ case T_AlterSubscriptionStmt:
+ retval = _copyAlterSubscriptionStmt(from);
+ break;
+ case T_DropSubscriptionStmt:
+ retval = _copyDropSubscriptionStmt(from);
+ break;
case T_A_Expr:
retval = _copyAExpr(from);
break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 78ed3c773e..55c73b7292 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -2134,6 +2134,64 @@ _equalAlterTSConfigurationStmt(const AlterTSConfigurationStmt *a,
return true;
}
+static bool
+_equalCreatePublicationStmt(const CreatePublicationStmt *a,
+ const CreatePublicationStmt *b)
+{
+ COMPARE_STRING_FIELD(pubname);
+ COMPARE_NODE_FIELD(options);
+ COMPARE_NODE_FIELD(tables);
+ COMPARE_SCALAR_FIELD(for_all_tables);
+
+ return true;
+}
+
+static bool
+_equalAlterPublicationStmt(const AlterPublicationStmt *a,
+ const AlterPublicationStmt *b)
+{
+ COMPARE_STRING_FIELD(pubname);
+ COMPARE_NODE_FIELD(options);
+ COMPARE_NODE_FIELD(tables);
+ COMPARE_SCALAR_FIELD(for_all_tables);
+ COMPARE_SCALAR_FIELD(tableAction);
+
+ return true;
+}
+
+static bool
+_equalCreateSubscriptionStmt(const CreateSubscriptionStmt *a,
+ const CreateSubscriptionStmt *b)
+{
+ COMPARE_STRING_FIELD(subname);
+ COMPARE_STRING_FIELD(conninfo);
+ COMPARE_NODE_FIELD(publication);
+ COMPARE_NODE_FIELD(options);
+
+ return true;
+}
+
+static bool
+_equalAlterSubscriptionStmt(const AlterSubscriptionStmt *a,
+ const AlterSubscriptionStmt *b)
+{
+ COMPARE_STRING_FIELD(subname);
+ COMPARE_NODE_FIELD(options);
+
+ return true;
+}
+
+static bool
+_equalDropSubscriptionStmt(const DropSubscriptionStmt *a,
+ const DropSubscriptionStmt *b)
+{
+ COMPARE_STRING_FIELD(subname);
+ COMPARE_SCALAR_FIELD(drop_slot);
+ COMPARE_SCALAR_FIELD(missing_ok);
+
+ return true;
+}
+
static bool
_equalCreatePolicyStmt(const CreatePolicyStmt *a, const CreatePolicyStmt *b)
{
@@ -3349,6 +3407,21 @@ equal(const void *a, const void *b)
case T_AlterPolicyStmt:
retval = _equalAlterPolicyStmt(a, b);
break;
+ case T_CreatePublicationStmt:
+ retval = _equalCreatePublicationStmt(a, b);
+ break;
+ case T_AlterPublicationStmt:
+ retval = _equalAlterPublicationStmt(a, b);
+ break;
+ case T_CreateSubscriptionStmt:
+ retval = _equalCreateSubscriptionStmt(a, b);
+ break;
+ case T_AlterSubscriptionStmt:
+ retval = _equalAlterSubscriptionStmt(a, b);
+ break;
+ case T_DropSubscriptionStmt:
+ retval = _equalDropSubscriptionStmt(a, b);
+ break;
case T_A_Expr:
retval = _equalAExpr(a, b);
break;
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index e61ba06efe..a8e35feccc 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -280,6 +280,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
DropOwnedStmt ReassignOwnedStmt
AlterTSConfigurationStmt AlterTSDictionaryStmt
CreateMatViewStmt RefreshMatViewStmt CreateAmStmt
+ CreatePublicationStmt AlterPublicationStmt
+ CreateSubscriptionStmt AlterSubscriptionStmt DropSubscriptionStmt
%type select_no_parens select_with_parens select_clause
simple_select values_clause
@@ -334,6 +336,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
database_name access_method_clause access_method attr_name
name cursor_name file_name
index_name opt_index_name cluster_index_specification
+ def_key
%type func_name handler_name qual_Op qual_all_Op subquery_Op
opt_class opt_inline_handler opt_validator validator_clause
@@ -391,10 +394,13 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
relation_expr_list dostmt_opt_list
transform_element_list transform_type_list
TriggerTransitions TriggerReferencing
+ publication_name_list
%type group_by_list
%type group_by_item empty_grouping_set rollup_clause cube_clause
%type grouping_sets_clause
+%type opt_publication_for_tables publication_for_tables
+%type publication_name_item
%type opt_fdw_options fdw_options
%type fdw_option
@@ -407,7 +413,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%type arg_class
%type func_return func_type
-%type opt_trusted opt_restart_seqs
+%type opt_trusted opt_restart_seqs opt_drop_slot
%type OptTemp
%type OptNoLog
%type OnCommitOption
@@ -647,7 +653,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
PARALLEL PARSER PARTIAL PARTITION PASSING PASSWORD PLACING PLANS POLICY
POSITION PRECEDING PRECISION PRESERVE PREPARE PREPARED PRIMARY
- PRIOR PRIVILEGES PROCEDURAL PROCEDURE PROGRAM
+ PRIOR PRIVILEGES PROCEDURAL PROCEDURE PROGRAM PUBLICATION
QUOTE
@@ -658,9 +664,9 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
SAVEPOINT SCHEMA SCROLL SEARCH SECOND_P SECURITY SELECT SEQUENCE SEQUENCES
SERIALIZABLE SERVER SESSION SESSION_USER SET SETS SETOF SHARE SHOW
- SIMILAR SIMPLE SKIP SMALLINT SNAPSHOT SOME SQL_P STABLE STANDALONE_P START
- STATEMENT STATISTICS STDIN STDOUT STORAGE STRICT_P STRIP_P SUBSTRING
- SYMMETRIC SYSID SYSTEM_P
+ SIMILAR SIMPLE SKIP SLOT SMALLINT SNAPSHOT SOME SQL_P STABLE STANDALONE_P
+ START STATEMENT STATISTICS STDIN STDOUT STORAGE STRICT_P STRIP_P
+ SUBSCRIPTION SUBSTRING SYMMETRIC SYSID SYSTEM_P
TABLE TABLES TABLESAMPLE TABLESPACE TEMP TEMPLATE TEMPORARY TEXT_P THEN
TIME TIMESTAMP TO TRAILING TRANSACTION TRANSFORM TREAT TRIGGER TRIM TRUE_P
@@ -822,8 +828,10 @@ stmt :
| AlterTableStmt
| AlterTblSpcStmt
| AlterCompositeTypeStmt
+ | AlterPublicationStmt
| AlterRoleSetStmt
| AlterRoleStmt
+ | AlterSubscriptionStmt
| AlterTSConfigurationStmt
| AlterTSDictionaryStmt
| AlterUserMappingStmt
@@ -851,12 +859,14 @@ stmt :
| CreateMatViewStmt
| CreateOpClassStmt
| CreateOpFamilyStmt
+ | CreatePublicationStmt
| AlterOpFamilyStmt
| CreatePolicyStmt
| CreatePLangStmt
| CreateSchemaStmt
| CreateSeqStmt
| CreateStmt
+ | CreateSubscriptionStmt
| CreateTableSpaceStmt
| CreateTransformStmt
| CreateTrigStmt
@@ -883,6 +893,7 @@ stmt :
| DropPLangStmt
| DropRuleStmt
| DropStmt
+ | DropSubscriptionStmt
| DropTableSpaceStmt
| DropTransformStmt
| DropTrigStmt
@@ -5613,16 +5624,21 @@ def_list: def_elem { $$ = list_make1($1); }
| def_list ',' def_elem { $$ = lappend($1, $3); }
;
-def_elem: ColLabel '=' def_arg
+def_elem: def_key '=' def_arg
{
$$ = makeDefElem($1, (Node *) $3, @1);
}
- | ColLabel
+ | def_key
{
$$ = makeDefElem($1, NULL, @1);
}
;
+def_key:
+ ColLabel { $$ = $1; }
+ | ColLabel ColLabel { $$ = psprintf("%s %s", $1, $2); }
+ ;
+
/* Note: any simple identifier will be returned as a type name! */
def_arg: func_type { $$ = (Node *)$1; }
| reserved_keyword { $$ = (Node *)makeString(pstrdup($1)); }
@@ -6073,6 +6089,7 @@ drop_type: TABLE { $$ = OBJECT_TABLE; }
| TEXT_P SEARCH DICTIONARY { $$ = OBJECT_TSDICTIONARY; }
| TEXT_P SEARCH TEMPLATE { $$ = OBJECT_TSTEMPLATE; }
| TEXT_P SEARCH CONFIGURATION { $$ = OBJECT_TSCONFIGURATION; }
+ | PUBLICATION { $$ = OBJECT_PUBLICATION; }
;
any_name_list:
@@ -8933,8 +8950,236 @@ AlterOwnerStmt: ALTER AGGREGATE aggregate_with_argtypes OWNER TO RoleSpec
n->newowner = $7;
$$ = (Node *)n;
}
+ | ALTER PUBLICATION name OWNER TO RoleSpec
+ {
+ AlterOwnerStmt *n = makeNode(AlterOwnerStmt);
+ n->objectType = OBJECT_PUBLICATION;
+ n->object = list_make1(makeString($3));
+ n->newowner = $6;
+ $$ = (Node *)n;
+ }
+ | ALTER SUBSCRIPTION name OWNER TO RoleSpec
+ {
+ AlterOwnerStmt *n = makeNode(AlterOwnerStmt);
+ n->objectType = OBJECT_SUBSCRIPTION;
+ n->object = list_make1(makeString($3));
+ n->newowner = $6;
+ $$ = (Node *)n;
+ }
+ ;
+
+
+/*****************************************************************************
+ *
+ * CREATE PUBLICATION name [ FOR TABLE ] [ WITH options ]
+ *
+ *****************************************************************************/
+
+CreatePublicationStmt:
+ CREATE PUBLICATION name opt_publication_for_tables opt_definition
+ {
+ CreatePublicationStmt *n = makeNode(CreatePublicationStmt);
+ n->pubname = $3;
+ n->options = $5;
+ if ($4 != NULL)
+ {
+ /* FOR TABLE */
+ if (IsA($4, List))
+ n->tables = (List *)$4;
+ /* FOR ALL TABLES */
+ else
+ n->for_all_tables = TRUE;
+ }
+ $$ = (Node *)n;
+ }
;
+opt_publication_for_tables:
+ publication_for_tables { $$ = $1; }
+ | /* EMPTY */ { $$ = NULL; }
+ ;
+
+publication_for_tables:
+ FOR TABLE relation_expr_list
+ {
+ $$ = (Node *) $3;
+ }
+ | FOR ALL TABLES
+ {
+ $$ = (Node *) makeInteger(TRUE);
+ }
+ ;
+
+/*****************************************************************************
+ *
+ * ALTER PUBLICATION name [ WITH ] options
+ *
+ * ALTER PUBLICATION name ADD TABLE table [, table2]
+ *
+ * ALTER PUBLICATION name DROP TABLE table [, table2]
+ *
+ * ALTER PUBLICATION name SET TABLE table [, table2]
+ *
+ *****************************************************************************/
+
+AlterPublicationStmt:
+ ALTER PUBLICATION name WITH definition
+ {
+ AlterPublicationStmt *n = makeNode(AlterPublicationStmt);
+ n->pubname = $3;
+ n->options = $5;
+ $$ = (Node *)n;
+ }
+ | ALTER PUBLICATION name ADD_P TABLE relation_expr_list
+ {
+ AlterPublicationStmt *n = makeNode(AlterPublicationStmt);
+ n->pubname = $3;
+ n->tables = $6;
+ n->tableAction = DEFELEM_ADD;
+ $$ = (Node *)n;
+ }
+ | ALTER PUBLICATION name SET TABLE relation_expr_list
+ {
+ AlterPublicationStmt *n = makeNode(AlterPublicationStmt);
+ n->pubname = $3;
+ n->tables = $6;
+ n->tableAction = DEFELEM_SET;
+ $$ = (Node *)n;
+ }
+ | ALTER PUBLICATION name DROP TABLE relation_expr_list
+ {
+ AlterPublicationStmt *n = makeNode(AlterPublicationStmt);
+ n->pubname = $3;
+ n->tables = $6;
+ n->tableAction = DEFELEM_DROP;
+ $$ = (Node *)n;
+ }
+ ;
+
+/*****************************************************************************
+ *
+ * CREATE SUBSCRIPTION name ...
+ *
+ *****************************************************************************/
+
+CreateSubscriptionStmt:
+ CREATE SUBSCRIPTION name CONNECTION Sconst PUBLICATION publication_name_list opt_definition
+ {
+ CreateSubscriptionStmt *n =
+ makeNode(CreateSubscriptionStmt);
+ n->subname = $3;
+ n->conninfo = $5;
+ n->publication = $7;
+ n->options = $8;
+ $$ = (Node *)n;
+ }
+ ;
+
+publication_name_list:
+ publication_name_item
+ {
+ $$ = list_make1($1);
+ }
+ | publication_name_list ',' publication_name_item
+ {
+ $$ = lappend($1, $3);
+ }
+ ;
+
+publication_name_item:
+ ColLabel { $$ = makeString($1); };
+
+/*****************************************************************************
+ *
+ * ALTER SUBSCRIPTION name [ WITH ] options
+ *
+ *****************************************************************************/
+
+AlterSubscriptionStmt:
+ ALTER SUBSCRIPTION name WITH definition
+ {
+ AlterSubscriptionStmt *n =
+ makeNode(AlterSubscriptionStmt);
+ n->subname = $3;
+ n->options = $5;
+ $$ = (Node *)n;
+ }
+ | ALTER SUBSCRIPTION name CONNECTION Sconst
+ {
+ AlterSubscriptionStmt *n =
+ makeNode(AlterSubscriptionStmt);
+ n->subname = $3;
+ n->options = list_make1(makeDefElem("conninfo",
+ (Node *)makeString($5), @1));
+ $$ = (Node *)n;
+ }
+ | ALTER SUBSCRIPTION name SET PUBLICATION publication_name_list
+ {
+ AlterSubscriptionStmt *n =
+ makeNode(AlterSubscriptionStmt);
+ n->subname = $3;
+ n->options = list_make1(makeDefElem("publication",
+ (Node *)$6, @1));
+ $$ = (Node *)n;
+ }
+ | ALTER SUBSCRIPTION name ENABLE_P
+ {
+ AlterSubscriptionStmt *n =
+ makeNode(AlterSubscriptionStmt);
+ n->subname = $3;
+ n->options = list_make1(makeDefElem("enabled",
+ (Node *)makeInteger(TRUE), @1));
+ $$ = (Node *)n;
+ }
+ | ALTER SUBSCRIPTION name DISABLE_P
+ {
+ AlterSubscriptionStmt *n =
+ makeNode(AlterSubscriptionStmt);
+ n->subname = $3;
+ n->options = list_make1(makeDefElem("enabled",
+ (Node *)makeInteger(FALSE), @1));
+ $$ = (Node *)n;
+ } ;
+
+/*****************************************************************************
+ *
+ * DROP SUBSCRIPTION [ IF EXISTS ] name
+ *
+ *****************************************************************************/
+
+DropSubscriptionStmt: DROP SUBSCRIPTION name opt_drop_slot
+ {
+ DropSubscriptionStmt *n = makeNode(DropSubscriptionStmt);
+ n->subname = $3;
+ n->drop_slot = $4;
+ n->missing_ok = false;
+ $$ = (Node *) n;
+ }
+ | DROP SUBSCRIPTION IF_P EXISTS name opt_drop_slot
+ {
+ DropSubscriptionStmt *n = makeNode(DropSubscriptionStmt);
+ n->subname = $5;
+ n->drop_slot = $6;
+ n->missing_ok = true;
+ $$ = (Node *) n;
+ }
+ ;
+
+opt_drop_slot:
+ IDENT SLOT
+ {
+ if (strcmp($1, "drop") == 0)
+ $$ = TRUE;
+ else if (strcmp($1, "nodrop") == 0)
+ $$ = FALSE;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("unrecognized option \"%s\"", $1),
+ parser_errposition(@1)));
+ }
+ | /*EMPTY*/ { $$ = TRUE; }
+ ;
/*****************************************************************************
*
@@ -14201,6 +14446,7 @@ unreserved_keyword:
| PROCEDURAL
| PROCEDURE
| PROGRAM
+ | PUBLICATION
| QUOTE
| RANGE
| READ
@@ -14244,6 +14490,7 @@ unreserved_keyword:
| SHOW
| SIMPLE
| SKIP
+ | SLOT
| SNAPSHOT
| SQL_P
| STABLE
@@ -14256,6 +14503,7 @@ unreserved_keyword:
| STORAGE
| STRICT_P
| STRIP_P
+ | SUBSCRIPTION
| SYSID
| SYSTEM_P
| TABLES
diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c
index 61d3170b83..cd99b0b392 100644
--- a/src/backend/postmaster/bgworker.c
+++ b/src/backend/postmaster/bgworker.c
@@ -20,6 +20,7 @@
#include "port/atomics.h"
#include "postmaster/bgworker_internals.h"
#include "postmaster/postmaster.h"
+#include "replication/logicallauncher.h"
#include "storage/dsm.h"
#include "storage/ipc.h"
#include "storage/latch.h"
@@ -107,6 +108,15 @@ struct BackgroundWorkerHandle
static BackgroundWorkerArray *BackgroundWorkerData;
+/*
+ * List of workers that are allowed to be started outside of
+ * shared_preload_libraries.
+ */
+static const bgworker_main_type InternalBGWorkers[] = {
+ ApplyLauncherMain,
+ NULL
+};
+
/*
* Calculate shared memory needed.
*/
@@ -761,12 +771,23 @@ RegisterBackgroundWorker(BackgroundWorker *worker)
{
RegisteredBgWorker *rw;
static int numworkers = 0;
+ bool internal = false;
+ int i;
if (!IsUnderPostmaster)
ereport(DEBUG1,
(errmsg("registering background worker \"%s\"", worker->bgw_name)));
- if (!process_shared_preload_libraries_in_progress)
+ for (i = 0; InternalBGWorkers[i]; i++)
+ {
+ if (worker->bgw_main == InternalBGWorkers[i])
+ {
+ internal = true;
+ break;
+ }
+ }
+
+ if (!process_shared_preload_libraries_in_progress && !internal)
{
if (!IsUnderPostmaster)
ereport(LOG,
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index f37a0bfaaf..7176cf1bbe 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -3303,6 +3303,12 @@ pgstat_get_wait_activity(WaitEventActivity w)
case WAIT_EVENT_WAL_WRITER_MAIN:
event_name = "WalWriterMain";
break;
+ case WAIT_EVENT_LOGICAL_LAUNCHER_MAIN:
+ event_name = "LogicalLauncherMain";
+ break;
+ case WAIT_EVENT_LOGICAL_APPLY_MAIN:
+ event_name = "LogicalApplyMain";
+ break;
/* no default case, so that compiler will warn */
}
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 5be30b0ee1..8d99c7a0d4 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -113,6 +113,7 @@
#include "postmaster/pgarch.h"
#include "postmaster/postmaster.h"
#include "postmaster/syslogger.h"
+#include "replication/logicallauncher.h"
#include "replication/walsender.h"
#include "storage/fd.h"
#include "storage/ipc.h"
@@ -941,6 +942,14 @@ PostmasterMain(int argc, char *argv[])
}
#endif
+ /*
+ * Register the apply launcher. Since it registers a background worker,
+ * it needs to be called before InitializeMaxBackends(), and it's probably
+ * a good idea to call it before any modules had chance to take the
+ * background worker slots.
+ */
+ ApplyLauncherRegister();
+
/*
* process any libraries that should be preloaded at postmaster start
*/
diff --git a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
index 7671b166ed..7df3698afb 100644
--- a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
+++ b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
@@ -24,9 +24,11 @@
#include "access/xlog.h"
#include "miscadmin.h"
#include "pgstat.h"
+#include "replication/logicalproto.h"
#include "replication/walreceiver.h"
#include "storage/proc.h"
#include "utils/builtins.h"
+#include "utils/pg_lsn.h"
PG_MODULE_MAGIC;
@@ -44,26 +46,35 @@ struct WalReceiverConn
/* Prototypes for interface functions */
static WalReceiverConn *libpqrcv_connect(const char *conninfo,
- bool logical, const char *appname);
+ bool logical, const char *appname,
+ char **err);
+static void libpqrcv_check_conninfo(const char *conninfo);
static char *libpqrcv_get_conninfo(WalReceiverConn *conn);
static char *libpqrcv_identify_system(WalReceiverConn *conn,
- TimeLineID *primary_tli);
+ TimeLineID *primary_tli,
+ int *server_version);
static void libpqrcv_readtimelinehistoryfile(WalReceiverConn *conn,
TimeLineID tli, char **filename,
char **content, int *len);
static bool libpqrcv_startstreaming(WalReceiverConn *conn,
- TimeLineID tli, XLogRecPtr startpoint,
- const char *slotname);
+ const WalRcvStreamOptions *options);
static void libpqrcv_endstreaming(WalReceiverConn *conn,
TimeLineID *next_tli);
static int libpqrcv_receive(WalReceiverConn *conn, char **buffer,
pgsocket *wait_fd);
static void libpqrcv_send(WalReceiverConn *conn, const char *buffer,
int nbytes);
+static char *libpqrcv_create_slot(WalReceiverConn *conn,
+ const char *slotname,
+ bool temporary,
+ XLogRecPtr *lsn);
+static bool libpqrcv_command(WalReceiverConn *conn,
+ const char *cmd, char **err);
static void libpqrcv_disconnect(WalReceiverConn *conn);
static WalReceiverFunctionsType PQWalReceiverFunctions = {
libpqrcv_connect,
+ libpqrcv_check_conninfo,
libpqrcv_get_conninfo,
libpqrcv_identify_system,
libpqrcv_readtimelinehistoryfile,
@@ -71,11 +82,14 @@ static WalReceiverFunctionsType PQWalReceiverFunctions = {
libpqrcv_endstreaming,
libpqrcv_receive,
libpqrcv_send,
+ libpqrcv_create_slot,
+ libpqrcv_command,
libpqrcv_disconnect
};
/* Prototypes for private functions */
static PGresult *libpqrcv_PQexec(PGconn *streamConn, const char *query);
+static char *stringlist_to_identifierstr(PGconn *conn, List *strings);
/*
* Module initialization function
@@ -90,9 +104,12 @@ _PG_init(void)
/*
* Establish the connection to the primary server for XLOG streaming
+ *
+ * Returns NULL on error and fills the err with palloc'ed error message.
*/
static WalReceiverConn *
-libpqrcv_connect(const char *conninfo, bool logical, const char *appname)
+libpqrcv_connect(const char *conninfo, bool logical, const char *appname,
+ char **err)
{
WalReceiverConn *conn;
const char *keys[5];
@@ -123,14 +140,34 @@ libpqrcv_connect(const char *conninfo, bool logical, const char *appname)
conn = palloc0(sizeof(WalReceiverConn));
conn->streamConn = PQconnectdbParams(keys, vals, /* expand_dbname = */ true);
if (PQstatus(conn->streamConn) != CONNECTION_OK)
- ereport(ERROR,
- (errmsg("could not connect to the primary server: %s",
- PQerrorMessage(conn->streamConn))));
+ {
+ *err = pstrdup(PQerrorMessage(conn->streamConn));
+ return NULL;
+ }
+
conn->logical = logical;
return conn;
}
+/*
+ * Validate connection info string (just try to parse it)
+ */
+static void
+libpqrcv_check_conninfo(const char *conninfo)
+{
+ PQconninfoOption *opts = NULL;
+ char *err = NULL;
+
+ opts = PQconninfoParse(conninfo, &err);
+ if (opts == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid connection string syntax: %s", err)));
+
+ PQconninfoFree(opts);
+}
+
/*
* Return a user-displayable conninfo string. Any security-sensitive fields
* are obfuscated.
@@ -185,7 +222,8 @@ libpqrcv_get_conninfo(WalReceiverConn *conn)
* timeline ID of the primary.
*/
static char *
-libpqrcv_identify_system(WalReceiverConn *conn, TimeLineID *primary_tli)
+libpqrcv_identify_system(WalReceiverConn *conn, TimeLineID *primary_tli,
+ int *server_version)
{
PGresult *res;
char *primary_sysid;
@@ -218,11 +256,13 @@ libpqrcv_identify_system(WalReceiverConn *conn, TimeLineID *primary_tli)
*primary_tli = pg_atoi(PQgetvalue(res, 0, 1), 4, 0);
PQclear(res);
+ *server_version = PQserverVersion(conn->streamConn);
+
return primary_sysid;
}
/*
- * Start streaming WAL data from given startpoint and timeline.
+ * Start streaming WAL data from given streaming options.
*
* Returns true if we switched successfully to copy-both mode. False
* means the server received the command and executed it successfully, but
@@ -233,27 +273,54 @@ libpqrcv_identify_system(WalReceiverConn *conn, TimeLineID *primary_tli)
*/
static bool
libpqrcv_startstreaming(WalReceiverConn *conn,
- TimeLineID tli, XLogRecPtr startpoint,
- const char *slotname)
+ const WalRcvStreamOptions *options)
{
StringInfoData cmd;
PGresult *res;
- Assert(!conn->logical);
+ Assert(options->logical == conn->logical);
+ Assert(options->slotname || !options->logical);
initStringInfo(&cmd);
- /* Start streaming from the point requested by startup process */
- if (slotname != NULL)
- appendStringInfo(&cmd,
- "START_REPLICATION SLOT \"%s\" %X/%X TIMELINE %u",
- slotname,
- (uint32) (startpoint >> 32), (uint32) startpoint,
- tli);
+ /* Build the command. */
+ appendStringInfoString(&cmd, "START_REPLICATION");
+ if (options->slotname != NULL)
+ appendStringInfo(&cmd, " SLOT \"%s\"",
+ options->slotname);
+
+ if (options->logical)
+ appendStringInfo(&cmd, " LOGICAL");
+
+ appendStringInfo(&cmd, " %X/%X",
+ (uint32) (options->startpoint >> 32),
+ (uint32) options->startpoint);
+
+ /*
+ * Additional options are different depending on if we are doing logical
+ * or physical replication.
+ */
+ if (options->logical)
+ {
+ char *pubnames_str;
+ List *pubnames;
+
+ appendStringInfoString(&cmd, " (");
+ appendStringInfo(&cmd, "proto_version '%u'",
+ options->proto.logical.proto_version);
+ pubnames = options->proto.logical.publication_names;
+ pubnames_str = stringlist_to_identifierstr(conn->streamConn, pubnames);
+ appendStringInfo(&cmd, ", publication_names %s",
+ PQescapeLiteral(conn->streamConn, pubnames_str,
+ strlen(pubnames_str)));
+ appendStringInfoChar(&cmd, ')');
+ pfree(pubnames_str);
+ }
else
- appendStringInfo(&cmd, "START_REPLICATION %X/%X TIMELINE %u",
- (uint32) (startpoint >> 32), (uint32) startpoint,
- tli);
+ appendStringInfo(&cmd, " TIMELINE %u",
+ options->proto.physical.startpointTLI);
+
+ /* Start streaming. */
res = libpqrcv_PQexec(conn->streamConn, cmd.data);
pfree(cmd.data);
@@ -577,3 +644,107 @@ libpqrcv_send(WalReceiverConn *conn, const char *buffer, int nbytes)
(errmsg("could not send data to WAL stream: %s",
PQerrorMessage(conn->streamConn))));
}
+
+/*
+ * Create new replication slot.
+ * Returns the name of the exported snapshot for logical slot or NULL for
+ * physical slot.
+ */
+static char *
+libpqrcv_create_slot(WalReceiverConn *conn, const char *slotname,
+ bool temporary, XLogRecPtr *lsn)
+{
+ PGresult *res;
+ StringInfoData cmd;
+ char *snapshot;
+
+ initStringInfo(&cmd);
+
+ appendStringInfo(&cmd, "CREATE_REPLICATION_SLOT \"%s\" ", slotname);
+
+ if (temporary)
+ appendStringInfo(&cmd, "TEMPORARY ");
+
+ if (conn->logical)
+ appendStringInfo(&cmd, "LOGICAL pgoutput");
+
+ res = libpqrcv_PQexec(conn->streamConn, cmd.data);
+ pfree(cmd.data);
+
+ if (PQresultStatus(res) != PGRES_TUPLES_OK)
+ {
+ PQclear(res);
+ ereport(ERROR,
+ (errmsg("could not create replication slot \"%s\": %s",
+ slotname, PQerrorMessage(conn->streamConn))));
+ }
+
+ *lsn = DatumGetLSN(DirectFunctionCall1Coll(pg_lsn_in, InvalidOid,
+ CStringGetDatum(PQgetvalue(res, 0, 1))));
+ if (!PQgetisnull(res, 0, 2))
+ snapshot = pstrdup(PQgetvalue(res, 0, 2));
+ else
+ snapshot = NULL;
+
+ PQclear(res);
+
+ return snapshot;
+}
+
+/*
+ * Run command.
+ *
+ * Returns if the command has succeeded and fills the err with palloced
+ * error message if not.
+ */
+static bool
+libpqrcv_command(WalReceiverConn *conn, const char *cmd, char **err)
+{
+ PGresult *res;
+
+ res = libpqrcv_PQexec(conn->streamConn, cmd);
+
+ if (PQresultStatus(res) != PGRES_COMMAND_OK)
+ {
+ PQclear(res);
+ *err = pstrdup(PQerrorMessage(conn->streamConn));
+ return false;
+ }
+
+ PQclear(res);
+
+ return true;
+}
+
+/*
+ * Given a List of strings, return it as single comma separated
+ * string, quoting identifiers as needed.
+ *
+ * This is essentially the reverse of SplitIdentifierString.
+ *
+ * The caller should free the result.
+ */
+static char *
+stringlist_to_identifierstr(PGconn *conn, List *strings)
+{
+ ListCell *lc;
+ StringInfoData res;
+ bool first = true;
+
+ initStringInfo(&res);
+
+ foreach (lc, strings)
+ {
+ char *val = strVal(lfirst(lc));
+
+ if (first)
+ first = false;
+ else
+ appendStringInfoChar(&res, ',');
+
+ appendStringInfoString(&res,
+ PQescapeIdentifier(conn, val, strlen(val)));
+ }
+
+ return res.data;
+}
diff --git a/src/backend/replication/logical/Makefile b/src/backend/replication/logical/Makefile
index 1d7ca062d1..259befa4e6 100644
--- a/src/backend/replication/logical/Makefile
+++ b/src/backend/replication/logical/Makefile
@@ -14,7 +14,7 @@ include $(top_builddir)/src/Makefile.global
override CPPFLAGS := -I$(srcdir) $(CPPFLAGS)
-OBJS = decode.o logical.o logicalfuncs.o message.o origin.o reorderbuffer.o \
- snapbuild.o
+OBJS = decode.o launcher.o logical.o logicalfuncs.o message.o origin.o \
+ proto.o relation.o reorderbuffer.o snapbuild.o worker.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c
new file mode 100644
index 0000000000..b5240dcede
--- /dev/null
+++ b/src/backend/replication/logical/launcher.c
@@ -0,0 +1,759 @@
+/*-------------------------------------------------------------------------
+ * launcher.c
+ * PostgreSQL logical replication worker launcher process
+ *
+ * Copyright (c) 2012-2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/replication/logical/launcher.c
+ *
+ * NOTES
+ * This module contains the logical replication worker launcher which
+ * uses the background worker infrastructure to start the logical
+ * replication workers for every enabled subscription.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+
+#include "access/heapam.h"
+#include "access/htup.h"
+#include "access/htup_details.h"
+#include "access/xact.h"
+
+#include "catalog/pg_subscription.h"
+
+#include "libpq/pqsignal.h"
+
+#include "postmaster/bgworker.h"
+#include "postmaster/fork_process.h"
+#include "postmaster/postmaster.h"
+
+#include "replication/logicallauncher.h"
+#include "replication/logicalworker.h"
+#include "replication/slot.h"
+#include "replication/worker_internal.h"
+
+#include "storage/ipc.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
+#include "storage/procsignal.h"
+
+#include "tcop/tcopprot.h"
+
+#include "utils/memutils.h"
+#include "utils/pg_lsn.h"
+#include "utils/ps_status.h"
+#include "utils/timeout.h"
+#include "utils/snapmgr.h"
+
+/* max sleep time between cycles (3min) */
+#define DEFAULT_NAPTIME_PER_CYCLE 180000L
+
+int max_logical_replication_workers = 4;
+LogicalRepWorker *MyLogicalRepWorker = NULL;
+
+typedef struct LogicalRepCtxStruct
+{
+ /* Supervisor process. */
+ pid_t launcher_pid;
+
+ /* Background workers. */
+ LogicalRepWorker workers[FLEXIBLE_ARRAY_MEMBER];
+} LogicalRepCtxStruct;
+
+LogicalRepCtxStruct *LogicalRepCtx;
+
+static void logicalrep_worker_onexit(int code, Datum arg);
+static void logicalrep_worker_detach(void);
+
+bool got_SIGTERM = false;
+static bool on_commit_laucher_wakeup = false;
+
+Datum pg_stat_get_subscription(PG_FUNCTION_ARGS);
+
+
+/*
+ * Load the list of subscriptions.
+ *
+ * Only the fields interesting for worker start/stop functions are filled for
+ * each subscription.
+ */
+static List *
+get_subscription_list(void)
+{
+ List *res = NIL;
+ Relation rel;
+ HeapScanDesc scan;
+ HeapTuple tup;
+ MemoryContext resultcxt;
+
+ /* This is the context that we will allocate our output data in */
+ resultcxt = CurrentMemoryContext;
+
+ /*
+ * Start a transaction so we can access pg_database, and get a snapshot.
+ * We don't have a use for the snapshot itself, but we're interested in
+ * the secondary effect that it sets RecentGlobalXmin. (This is critical
+ * for anything that reads heap pages, because HOT may decide to prune
+ * them even if the process doesn't attempt to modify any tuples.)
+ */
+ StartTransactionCommand();
+ (void) GetTransactionSnapshot();
+
+ rel = heap_open(SubscriptionRelationId, AccessShareLock);
+ scan = heap_beginscan_catalog(rel, 0, NULL);
+
+ while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection)))
+ {
+ Form_pg_subscription subform = (Form_pg_subscription) GETSTRUCT(tup);
+ Subscription *sub;
+ MemoryContext oldcxt;
+
+ /*
+ * Allocate our results in the caller's context, not the
+ * transaction's. We do this inside the loop, and restore the original
+ * context at the end, so that leaky things like heap_getnext() are
+ * not called in a potentially long-lived context.
+ */
+ oldcxt = MemoryContextSwitchTo(resultcxt);
+
+ sub = (Subscription *) palloc(sizeof(Subscription));
+ sub->oid = HeapTupleGetOid(tup);
+ sub->dbid = subform->subdbid;
+ sub->owner = subform->subowner;
+ sub->enabled = subform->subenabled;
+ sub->name = pstrdup(NameStr(subform->subname));
+
+ /* We don't fill fields we are not interested in. */
+ sub->conninfo = NULL;
+ sub->slotname = NULL;
+ sub->publications = NIL;
+
+ res = lappend(res, sub);
+ MemoryContextSwitchTo(oldcxt);
+ }
+
+ heap_endscan(scan);
+ heap_close(rel, AccessShareLock);
+
+ CommitTransactionCommand();
+
+ return res;
+}
+
+/*
+ * Wait for a background worker to start up and attach to the shmem context.
+ *
+ * This is like WaitForBackgroundWorkerStartup(), except that we wait for
+ * attaching, not just start and we also just exit if postmaster died.
+ */
+static bool
+WaitForReplicationWorkerAttach(LogicalRepWorker *worker,
+ BackgroundWorkerHandle *handle)
+{
+ BgwHandleStatus status;
+ int rc;
+
+ for (;;)
+ {
+ pid_t pid;
+
+ CHECK_FOR_INTERRUPTS();
+
+ status = GetBackgroundWorkerPid(handle, &pid);
+
+ /*
+ * Worker started and attached to our shmem. This check is safe
+ * because only laucher ever starts the workers, so nobody can steal
+ * the worker slot.
+ */
+ if (status == BGWH_STARTED && worker->proc)
+ return true;
+ /* Worker didn't start or died before attaching to our shmem. */
+ if (status == BGWH_STOPPED)
+ return false;
+
+ /*
+ * We need timeout because we generaly don't get notified via latch
+ * about the worker attach.
+ */
+ rc = WaitLatch(MyLatch,
+ WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+ 1000L, WAIT_EVENT_BGWORKER_STARTUP);
+
+ if (rc & WL_POSTMASTER_DEATH)
+ proc_exit(1);
+
+ ResetLatch(MyLatch);
+ }
+
+ return false;
+}
+
+/*
+ * Walks the workers array and searches for one that matches given
+ * subscription id.
+ */
+LogicalRepWorker *
+logicalrep_worker_find(Oid subid)
+{
+ int i;
+ LogicalRepWorker *res = NULL;
+
+ Assert(LWLockHeldByMe(LogicalRepWorkerLock));
+ /* Search for attached worker for a given subscription id. */
+ for (i = 0; i < max_logical_replication_workers; i++)
+ {
+ LogicalRepWorker *w = &LogicalRepCtx->workers[i];
+ if (w->subid == subid && w->proc && IsBackendPid(w->proc->pid))
+ {
+ res = w;
+ break;
+ }
+ }
+
+ return res;
+}
+
+/*
+ * Start new apply background worker.
+ */
+void
+logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid)
+{
+ BackgroundWorker bgw;
+ BackgroundWorkerHandle *bgw_handle;
+ int slot;
+ LogicalRepWorker *worker = NULL;
+
+ ereport(LOG,
+ (errmsg("starting logical replication worker for subscription \"%s\"",
+ subname)));
+
+ /* Report this after the initial starting message for consistency. */
+ if (max_replication_slots == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
+ errmsg("cannot start logical replication workers when max_replication_slots = 0")));
+
+ /*
+ * We need to do the modification of the shared memory under lock so that
+ * we have consistent view.
+ */
+ LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
+
+ /* Find unused worker slot. */
+ for (slot = 0; slot < max_logical_replication_workers; slot++)
+ {
+ if (!LogicalRepCtx->workers[slot].proc)
+ {
+ worker = &LogicalRepCtx->workers[slot];
+ break;
+ }
+ }
+
+ /* Bail if not found */
+ if (worker == NULL)
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
+ errmsg("out of logical replication workers slots"),
+ errhint("You might need to increase max_logical_replication_workers.")));
+ return;
+ }
+
+ /* Prepare the worker info. */
+ memset(worker, 0, sizeof(LogicalRepWorker));
+ worker->dbid = dbid;
+ worker->userid = userid;
+ worker->subid = subid;
+
+ LWLockRelease(LogicalRepWorkerLock);
+
+ /* Register the new dynamic worker. */
+ bgw.bgw_flags = BGWORKER_SHMEM_ACCESS |
+ BGWORKER_BACKEND_DATABASE_CONNECTION;
+ bgw.bgw_start_time = BgWorkerStart_RecoveryFinished;
+ bgw.bgw_main = ApplyWorkerMain;
+ snprintf(bgw.bgw_name, BGW_MAXLEN,
+ "logical replication worker for subscription %u", subid);
+
+ bgw.bgw_restart_time = BGW_NEVER_RESTART;
+ bgw.bgw_notify_pid = MyProcPid;
+ bgw.bgw_main_arg = slot;
+
+ if (!RegisterDynamicBackgroundWorker(&bgw, &bgw_handle))
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
+ errmsg("out of background workers slots"),
+ errhint("You might need to increase max_worker_processes.")));
+ return;
+ }
+
+ /* Now wait until it attaches. */
+ WaitForReplicationWorkerAttach(worker, bgw_handle);
+}
+
+/*
+ * Stop the logical replication worker and wait until it detaches from the
+ * slot.
+ *
+ * The caller must hold LogicalRepLauncherLock to ensure that new workers are
+ * not being started during this function call.
+ */
+void
+logicalrep_worker_stop(Oid subid)
+{
+ LogicalRepWorker *worker;
+
+ Assert(LWLockHeldByMe(LogicalRepLauncherLock));
+
+ LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
+
+ worker = logicalrep_worker_find(subid);
+
+ /* No worker, nothing to do. */
+ if (!worker)
+ {
+ LWLockRelease(LogicalRepWorkerLock);
+ return;
+ }
+
+ /*
+ * If we found worker but it does not have proc set it is starting up,
+ * wait for it to finish and then kill it.
+ */
+ while (worker && !worker->proc)
+ {
+ int rc;
+
+ LWLockRelease(LogicalRepWorkerLock);
+
+ CHECK_FOR_INTERRUPTS();
+
+ /* Wait for signal. */
+ rc = WaitLatch(&MyProc->procLatch,
+ WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+ 1000L, WAIT_EVENT_BGWORKER_STARTUP);
+
+ /* emergency bailout if postmaster has died */
+ if (rc & WL_POSTMASTER_DEATH)
+ proc_exit(1);
+
+ ResetLatch(&MyProc->procLatch);
+
+ /* Check if the worker has started. */
+ LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
+ worker = logicalrep_worker_find(subid);
+ if (!worker || worker->proc)
+ break;
+ }
+
+ /* Now terminate the worker ... */
+ kill(worker->proc->pid, SIGTERM);
+ LWLockRelease(LogicalRepWorkerLock);
+
+ /* ... and wait for it to die. */
+ for (;;)
+ {
+ int rc;
+
+ LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
+ if (!worker->proc)
+ {
+ LWLockRelease(LogicalRepWorkerLock);
+ break;
+ }
+ LWLockRelease(LogicalRepWorkerLock);
+
+ CHECK_FOR_INTERRUPTS();
+
+ /* Wait for more work. */
+ rc = WaitLatch(&MyProc->procLatch,
+ WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+ 1000L, WAIT_EVENT_BGWORKER_SHUTDOWN);
+
+ /* emergency bailout if postmaster has died */
+ if (rc & WL_POSTMASTER_DEATH)
+ proc_exit(1);
+
+ ResetLatch(&MyProc->procLatch);
+ }
+}
+
+/*
+ * Attach to a slot.
+ */
+void
+logicalrep_worker_attach(int slot)
+{
+ /* Block concurrent access. */
+ LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
+
+ Assert(slot >= 0 && slot < max_logical_replication_workers);
+ MyLogicalRepWorker = &LogicalRepCtx->workers[slot];
+
+ if (MyLogicalRepWorker->proc)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("logical replication worker slot %d already used by "
+ "another worker", slot)));
+
+ MyLogicalRepWorker->proc = MyProc;
+ before_shmem_exit(logicalrep_worker_onexit, (Datum) 0);
+
+ LWLockRelease(LogicalRepWorkerLock);
+}
+
+/*
+ * Detach the worker (cleans up the worker info).
+ */
+static void
+logicalrep_worker_detach(void)
+{
+ /* Block concurrent access. */
+ LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE);
+
+ MyLogicalRepWorker->dbid = InvalidOid;
+ MyLogicalRepWorker->userid = InvalidOid;
+ MyLogicalRepWorker->subid = InvalidOid;
+ MyLogicalRepWorker->proc = NULL;
+
+ LWLockRelease(LogicalRepWorkerLock);
+}
+
+/*
+ * Cleanup function.
+ *
+ * Called on logical replication worker exit.
+ */
+static void
+logicalrep_worker_onexit(int code, Datum arg)
+{
+ logicalrep_worker_detach();
+}
+
+/* SIGTERM: set flag to exit at next convenient time */
+void
+logicalrep_worker_sigterm(SIGNAL_ARGS)
+{
+ got_SIGTERM = true;
+
+ /* Waken anything waiting on the process latch */
+ SetLatch(MyLatch);
+}
+
+/*
+ * ApplyLauncherShmemSize
+ * Compute space needed for replication launcher shared memory
+ */
+Size
+ApplyLauncherShmemSize(void)
+{
+ Size size;
+
+ /*
+ * Need the fixed struct and the array of LogicalRepWorker.
+ */
+ size = sizeof(LogicalRepCtxStruct);
+ size = MAXALIGN(size);
+ size = add_size(size, mul_size(max_logical_replication_workers,
+ sizeof(LogicalRepWorker)));
+ return size;
+}
+
+void
+ApplyLauncherRegister(void)
+{
+ BackgroundWorker bgw;
+
+ if (max_logical_replication_workers == 0)
+ return;
+
+ bgw.bgw_flags = BGWORKER_SHMEM_ACCESS |
+ BGWORKER_BACKEND_DATABASE_CONNECTION;
+ bgw.bgw_start_time = BgWorkerStart_RecoveryFinished;
+ bgw.bgw_main = ApplyLauncherMain;
+ snprintf(bgw.bgw_name, BGW_MAXLEN,
+ "logical replication launcher");
+ bgw.bgw_restart_time = 5;
+ bgw.bgw_notify_pid = 0;
+ bgw.bgw_main_arg = (Datum) 0;
+
+ RegisterBackgroundWorker(&bgw);
+}
+
+/*
+ * ApplyLauncherShmemInit
+ * Allocate and initialize replication launcher shared memory
+ */
+void
+ApplyLauncherShmemInit(void)
+{
+ bool found;
+
+ LogicalRepCtx = (LogicalRepCtxStruct *)
+ ShmemInitStruct("Logical Replication Launcher Data",
+ ApplyLauncherShmemSize(),
+ &found);
+
+ if (!found)
+ memset(LogicalRepCtx, 0, ApplyLauncherShmemSize());
+}
+
+/*
+ * Wakeup the launcher on commit if requested.
+ */
+void
+AtCommit_ApplyLauncher(void)
+{
+ if (on_commit_laucher_wakeup)
+ ApplyLauncherWakeup();
+}
+
+/*
+ * Request wakeup of the launcher on commit of the transaction.
+ *
+ * This is used to send launcher signal to stop sleeping and proccess the
+ * subscriptions when current transaction commits. Should be used when new
+ * tuple was added to the pg_subscription catalog.
+*/
+void
+ApplyLauncherWakeupAtCommit(void)
+{
+ if (!on_commit_laucher_wakeup)
+ on_commit_laucher_wakeup = true;
+}
+
+void
+ApplyLauncherWakeup(void)
+{
+ if (IsBackendPid(LogicalRepCtx->launcher_pid))
+ kill(LogicalRepCtx->launcher_pid, SIGUSR1);
+}
+
+/*
+ * Main loop for the apply launcher process.
+ */
+void
+ApplyLauncherMain(Datum main_arg)
+{
+ ereport(LOG,
+ (errmsg("logical replication launcher started")));
+
+ /* Establish signal handlers. */
+ pqsignal(SIGTERM, logicalrep_worker_sigterm);
+ BackgroundWorkerUnblockSignals();
+
+ /* Make it easy to identify our processes. */
+ SetConfigOption("application_name", MyBgworkerEntry->bgw_name,
+ PGC_USERSET, PGC_S_SESSION);
+
+ LogicalRepCtx->launcher_pid = MyProcPid;
+
+ /*
+ * Establish connection to nailed catalogs (we only ever access
+ * pg_subscription).
+ */
+ BackgroundWorkerInitializeConnection(NULL, NULL);
+
+ /* Enter main loop */
+ while (!got_SIGTERM)
+ {
+ int rc;
+ List *sublist;
+ ListCell *lc;
+ MemoryContext subctx;
+ MemoryContext oldctx;
+ TimestampTz now;
+ TimestampTz last_start_time = 0;
+ long wait_time = DEFAULT_NAPTIME_PER_CYCLE;
+
+ now = GetCurrentTimestamp();
+
+ /* Limit the start retry to once a wal_retrieve_retry_interval */
+ if (TimestampDifferenceExceeds(last_start_time, now,
+ wal_retrieve_retry_interval))
+ {
+ /* Use temporary context for the database list and worker info. */
+ subctx = AllocSetContextCreate(TopMemoryContext,
+ "Logical Replication Launcher sublist",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+ oldctx = MemoryContextSwitchTo(subctx);
+
+ /* Block any concurrent DROP SUBSCRIPTION. */
+ LWLockAcquire(LogicalRepLauncherLock, LW_EXCLUSIVE);
+
+ /* search for subscriptions to start or stop. */
+ sublist = get_subscription_list();
+
+ /* Start the missing workers for enabled subscriptions. */
+ foreach(lc, sublist)
+ {
+ Subscription *sub = (Subscription *) lfirst(lc);
+ LogicalRepWorker *w;
+
+ LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
+ w = logicalrep_worker_find(sub->oid);
+ LWLockRelease(LogicalRepWorkerLock);
+
+ if (sub->enabled && w == NULL)
+ {
+ logicalrep_worker_launch(sub->dbid, sub->oid, sub->name, sub->owner);
+ last_start_time = now;
+ wait_time = wal_retrieve_retry_interval;
+ /* Limit to one worker per mainloop cycle. */
+ break;
+ }
+ }
+
+ LWLockRelease(LogicalRepLauncherLock);
+
+ /* Switch back to original memory context. */
+ MemoryContextSwitchTo(oldctx);
+ /* Clean the temporary memory. */
+ MemoryContextDelete(subctx);
+ }
+ else
+ {
+ /*
+ * The wait in previous cycle was interruped in less than
+ * wal_retrieve_retry_interval since last worker was started,
+ * this usually means crash of the worker, so we should retry
+ * in wal_retrieve_retry_interval again.
+ */
+ wait_time = wal_retrieve_retry_interval;
+ }
+
+ /* Wait for more work. */
+ rc = WaitLatch(&MyProc->procLatch,
+ WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+ wait_time,
+ WAIT_EVENT_LOGICAL_LAUNCHER_MAIN);
+
+ /* emergency bailout if postmaster has died */
+ if (rc & WL_POSTMASTER_DEATH)
+ proc_exit(1);
+
+ ResetLatch(&MyProc->procLatch);
+ }
+
+ LogicalRepCtx->launcher_pid = 0;
+
+ /* ... and if it returns, we're done */
+ ereport(LOG,
+ (errmsg("logical replication launcher shutting down")));
+
+ proc_exit(0);
+}
+
+/*
+ * Returns state of the subscriptions.
+ */
+Datum
+pg_stat_get_subscription(PG_FUNCTION_ARGS)
+{
+#define PG_STAT_GET_SUBSCRIPTION_COLS 7
+ Oid subid = PG_ARGISNULL(0) ? InvalidOid : PG_GETARG_OID(0);
+ int i;
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+ TupleDesc tupdesc;
+ Tuplestorestate *tupstore;
+ MemoryContext per_query_ctx;
+ MemoryContext oldcontext;
+
+ /* check to see if caller supports us returning a tuplestore */
+ if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("set-valued function called in context that cannot accept a set")));
+ if (!(rsinfo->allowedModes & SFRM_Materialize))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("materialize mode required, but it is not " \
+ "allowed in this context")));
+
+ /* Build a tuple descriptor for our result type */
+ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+ elog(ERROR, "return type must be a row type");
+
+ per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
+ oldcontext = MemoryContextSwitchTo(per_query_ctx);
+
+ tupstore = tuplestore_begin_heap(true, false, work_mem);
+ rsinfo->returnMode = SFRM_Materialize;
+ rsinfo->setResult = tupstore;
+ rsinfo->setDesc = tupdesc;
+
+ MemoryContextSwitchTo(oldcontext);
+
+ /* Make sure we get consistent view of the workers. */
+ LWLockAcquire(LogicalRepWorkerLock, LW_SHARED);
+
+ for (i = 0; i <= max_logical_replication_workers; i++)
+ {
+ /* for each row */
+ Datum values[PG_STAT_GET_SUBSCRIPTION_COLS];
+ bool nulls[PG_STAT_GET_SUBSCRIPTION_COLS];
+ int worker_pid;
+ LogicalRepWorker worker;
+
+ memcpy(&worker, &LogicalRepCtx->workers[i],
+ sizeof(LogicalRepWorker));
+ if (!worker.proc || !IsBackendPid(worker.proc->pid))
+ continue;
+
+ if (OidIsValid(subid) && worker.subid != subid)
+ continue;
+
+ worker_pid = worker.proc->pid;
+
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, 0, sizeof(nulls));
+
+ values[0] = ObjectIdGetDatum(worker.subid);
+ values[1] = Int32GetDatum(worker_pid);
+ if (XLogRecPtrIsInvalid(worker.last_lsn))
+ nulls[2] = true;
+ else
+ values[2] = LSNGetDatum(worker.last_lsn);
+ if (worker.last_send_time == 0)
+ nulls[3] = true;
+ else
+ values[3] = TimestampTzGetDatum(worker.last_send_time);
+ if (worker.last_recv_time == 0)
+ nulls[4] = true;
+ else
+ values[4] = TimestampTzGetDatum(worker.last_recv_time);
+ if (XLogRecPtrIsInvalid(worker.reply_lsn))
+ nulls[5] = true;
+ else
+ values[5] = LSNGetDatum(worker.reply_lsn);
+ if (worker.reply_time == 0)
+ nulls[6] = true;
+ else
+ values[6] = TimestampTzGetDatum(worker.reply_time);
+
+ tuplestore_putvalues(tupstore, tupdesc, values, nulls);
+
+ /* If only a single subscription was requested, and we found it, break. */
+ if (OidIsValid(subid))
+ break;
+ }
+
+ LWLockRelease(LogicalRepWorkerLock);
+
+ /* clean up and return the tuplestore */
+ tuplestore_donestoring(tupstore);
+
+ return (Datum) 0;
+}
diff --git a/src/backend/replication/logical/proto.c b/src/backend/replication/logical/proto.c
new file mode 100644
index 0000000000..1f30de606a
--- /dev/null
+++ b/src/backend/replication/logical/proto.c
@@ -0,0 +1,637 @@
+/*-------------------------------------------------------------------------
+ *
+ * proto.c
+ * logical replication protocol functions
+ *
+ * Copyright (c) 2015, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/replication/logical/proto.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/sysattr.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_type.h"
+#include "libpq/pqformat.h"
+#include "replication/logicalproto.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/syscache.h"
+
+/*
+ * Protocol message flags.
+ */
+#define LOGICALREP_IS_REPLICA_IDENTITY 1
+
+static void logicalrep_write_attrs(StringInfo out, Relation rel);
+static void logicalrep_write_tuple(StringInfo out, Relation rel,
+ HeapTuple tuple);
+
+static void logicalrep_read_attrs(StringInfo in, LogicalRepRelation *rel);
+static void logicalrep_read_tuple(StringInfo in, LogicalRepTupleData *tuple);
+
+static void logicalrep_write_namespace(StringInfo out, Oid nspid);
+static const char *logicalrep_read_namespace(StringInfo in);
+
+/*
+ * Write BEGIN to the output stream.
+ */
+void
+logicalrep_write_begin(StringInfo out, ReorderBufferTXN *txn)
+{
+ pq_sendbyte(out, 'B'); /* BEGIN */
+
+ /* fixed fields */
+ pq_sendint64(out, txn->final_lsn);
+ pq_sendint64(out, txn->commit_time);
+ pq_sendint(out, txn->xid, 4);
+}
+
+/*
+ * Read transaction BEGIN from the stream.
+ */
+void
+logicalrep_read_begin(StringInfo in, LogicalRepBeginData *begin_data)
+{
+ /* read fields */
+ begin_data->final_lsn = pq_getmsgint64(in);
+ if (begin_data->final_lsn == InvalidXLogRecPtr)
+ elog(ERROR, "final_lsn not set in begin message");
+ begin_data->committime = pq_getmsgint64(in);
+ begin_data->xid = pq_getmsgint(in, 4);
+}
+
+
+/*
+ * Write COMMIT to the output stream.
+ */
+void
+logicalrep_write_commit(StringInfo out, ReorderBufferTXN *txn,
+ XLogRecPtr commit_lsn)
+{
+ uint8 flags = 0;
+
+ pq_sendbyte(out, 'C'); /* sending COMMIT */
+
+ /* send the flags field (unused for now) */
+ pq_sendbyte(out, flags);
+
+ /* send fields */
+ pq_sendint64(out, commit_lsn);
+ pq_sendint64(out, txn->end_lsn);
+ pq_sendint64(out, txn->commit_time);
+}
+
+/*
+ * Read transaction COMMIT from the stream.
+ */
+void
+logicalrep_read_commit(StringInfo in, LogicalRepCommitData *commit_data)
+{
+ /* read flags (unused for now) */
+ uint8 flags = pq_getmsgbyte(in);
+
+ if (flags != 0)
+ elog(ERROR, "unknown flags %u in commit message", flags);
+
+ /* read fields */
+ commit_data->commit_lsn = pq_getmsgint64(in);
+ commit_data->end_lsn = pq_getmsgint64(in);
+ commit_data->committime = pq_getmsgint64(in);
+}
+
+/*
+ * Write ORIGIN to the output stream.
+ */
+void
+logicalrep_write_origin(StringInfo out, const char *origin,
+ XLogRecPtr origin_lsn)
+{
+ pq_sendbyte(out, 'O'); /* ORIGIN */
+
+ /* fixed fields */
+ pq_sendint64(out, origin_lsn);
+
+ /* origin string */
+ pq_sendstring(out, origin);
+}
+
+/*
+ * Read ORIGIN from the output stream.
+ */
+char *
+logicalrep_read_origin(StringInfo in, XLogRecPtr *origin_lsn)
+{
+ /* fixed fields */
+ *origin_lsn = pq_getmsgint64(in);
+
+ /* return origin */
+ return pstrdup(pq_getmsgstring(in));
+}
+
+/*
+ * Write INSERT to the output stream.
+ */
+void
+logicalrep_write_insert(StringInfo out, Relation rel, HeapTuple newtuple)
+{
+ pq_sendbyte(out, 'I'); /* action INSERT */
+
+ Assert(rel->rd_rel->relreplident == REPLICA_IDENTITY_DEFAULT ||
+ rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL ||
+ rel->rd_rel->relreplident == REPLICA_IDENTITY_INDEX);
+
+ /* use Oid as relation identifier */
+ pq_sendint(out, RelationGetRelid(rel), 4);
+
+ pq_sendbyte(out, 'N'); /* new tuple follows */
+ logicalrep_write_tuple(out, rel, newtuple);
+}
+
+/*
+ * Read INSERT from stream.
+ *
+ * Fills the new tuple.
+ */
+LogicalRepRelId
+logicalrep_read_insert(StringInfo in, LogicalRepTupleData *newtup)
+{
+ char action;
+ LogicalRepRelId relid;
+
+ /* read the relation id */
+ relid = pq_getmsgint(in, 4);
+
+ action = pq_getmsgbyte(in);
+ if (action != 'N')
+ elog(ERROR, "expected new tuple but got %d",
+ action);
+
+ logicalrep_read_tuple(in, newtup);
+
+ return relid;
+}
+
+/*
+ * Write UPDATE to the output stream.
+ */
+void
+logicalrep_write_update(StringInfo out, Relation rel, HeapTuple oldtuple,
+ HeapTuple newtuple)
+{
+ pq_sendbyte(out, 'U'); /* action UPDATE */
+
+ Assert(rel->rd_rel->relreplident == REPLICA_IDENTITY_DEFAULT ||
+ rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL ||
+ rel->rd_rel->relreplident == REPLICA_IDENTITY_INDEX);
+
+ /* use Oid as relation identifier */
+ pq_sendint(out, RelationGetRelid(rel), 4);
+
+ if (oldtuple != NULL)
+ {
+ if (rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
+ pq_sendbyte(out, 'O'); /* old tuple follows */
+ else
+ pq_sendbyte(out, 'K'); /* old key follows */
+ logicalrep_write_tuple(out, rel, oldtuple);
+ }
+
+ pq_sendbyte(out, 'N'); /* new tuple follows */
+ logicalrep_write_tuple(out, rel, newtuple);
+}
+
+/*
+ * Read UPDATE from stream.
+ */
+LogicalRepRelId
+logicalrep_read_update(StringInfo in, bool *has_oldtuple,
+ LogicalRepTupleData *oldtup,
+ LogicalRepTupleData *newtup)
+{
+ char action;
+ LogicalRepRelId relid;
+
+ /* read the relation id */
+ relid = pq_getmsgint(in, 4);
+
+ /* read and verify action */
+ action = pq_getmsgbyte(in);
+ if (action != 'K' && action != 'O' && action != 'N')
+ elog(ERROR, "expected action 'N', 'O' or 'K', got %c",
+ action);
+
+ /* check for old tuple */
+ if (action == 'K' || action == 'O')
+ {
+ logicalrep_read_tuple(in, oldtup);
+ *has_oldtuple = true;
+
+ action = pq_getmsgbyte(in);
+ }
+ else
+ *has_oldtuple = false;
+
+ /* check for new tuple */
+ if (action != 'N')
+ elog(ERROR, "expected action 'N', got %c",
+ action);
+
+ logicalrep_read_tuple(in, newtup);
+
+ return relid;
+}
+
+/*
+ * Write DELETE to the output stream.
+ */
+void
+logicalrep_write_delete(StringInfo out, Relation rel, HeapTuple oldtuple)
+{
+ Assert(rel->rd_rel->relreplident == REPLICA_IDENTITY_DEFAULT ||
+ rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL ||
+ rel->rd_rel->relreplident == REPLICA_IDENTITY_INDEX);
+
+ pq_sendbyte(out, 'D'); /* action DELETE */
+
+ /* use Oid as relation identifier */
+ pq_sendint(out, RelationGetRelid(rel), 4);
+
+ if (rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
+ pq_sendbyte(out, 'O'); /* old tuple follows */
+ else
+ pq_sendbyte(out, 'K'); /* old key follows */
+
+ logicalrep_write_tuple(out, rel, oldtuple);
+}
+
+/*
+ * Read DELETE from stream.
+ *
+ * Fills the old tuple.
+ */
+LogicalRepRelId
+logicalrep_read_delete(StringInfo in, LogicalRepTupleData *oldtup)
+{
+ char action;
+ LogicalRepRelId relid;
+
+ /* read the relation id */
+ relid = pq_getmsgint(in, 4);
+
+ /* read and verify action */
+ action = pq_getmsgbyte(in);
+ if (action != 'K' && action != 'O')
+ elog(ERROR, "expected action 'O' or 'K', got %c", action);
+
+ logicalrep_read_tuple(in, oldtup);
+
+ return relid;
+}
+
+/*
+ * Write relation description to the output stream.
+ */
+void
+logicalrep_write_rel(StringInfo out, Relation rel)
+{
+ char *relname;
+
+ pq_sendbyte(out, 'R'); /* sending RELATION */
+
+ /* use Oid as relation identifier */
+ pq_sendint(out, RelationGetRelid(rel), 4);
+
+ /* send qualified relation name */
+ logicalrep_write_namespace(out, RelationGetNamespace(rel));
+ relname = RelationGetRelationName(rel);
+ pq_sendstring(out, relname);
+
+ /* send replica identity */
+ pq_sendbyte(out, rel->rd_rel->relreplident);
+
+ /* send the attribute info */
+ logicalrep_write_attrs(out, rel);
+}
+
+/*
+ * Read the relation info from stream and return as LogicalRepRelation.
+ */
+LogicalRepRelation *
+logicalrep_read_rel(StringInfo in)
+{
+ LogicalRepRelation *rel = palloc(sizeof(LogicalRepRelation));
+
+ rel->remoteid = pq_getmsgint(in, 4);
+
+ /* Read relation name from stream */
+ rel->nspname = pstrdup(logicalrep_read_namespace(in));
+ rel->relname = pstrdup(pq_getmsgstring(in));
+
+ /* Read the replica identity. */
+ rel->replident = pq_getmsgbyte(in);
+
+ /* Get attribute description */
+ logicalrep_read_attrs(in, rel);
+
+ return rel;
+}
+
+/*
+ * Write type info to the output stream.
+ *
+ * This function will always write base type info.
+ */
+void
+logicalrep_write_typ(StringInfo out, Oid typoid)
+{
+ Oid basetypoid = getBaseType(typoid);
+ HeapTuple tup;
+ Form_pg_type typtup;
+
+ pq_sendbyte(out, 'Y'); /* sending TYPE */
+
+ tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(basetypoid));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for type %u", basetypoid);
+ typtup = (Form_pg_type) GETSTRUCT(tup);
+
+ /* use Oid as relation identifier */
+ pq_sendint(out, typoid, 4);
+
+ /* send qualified type name */
+ logicalrep_write_namespace(out, typtup->typnamespace);
+ pq_sendstring(out, NameStr(typtup->typname));
+
+ ReleaseSysCache(tup);
+}
+
+/*
+ * Read type info from the output stream.
+ */
+void
+logicalrep_read_typ(StringInfo in, LogicalRepTyp *ltyp)
+{
+ ltyp->remoteid = pq_getmsgint(in, 4);
+
+ /* Read tupe name from stream */
+ ltyp->nspname = pstrdup(logicalrep_read_namespace(in));
+ ltyp->typname = pstrdup(pq_getmsgstring(in));
+}
+
+/*
+ * Write a tuple to the outputstream, in the most efficient format possible.
+ */
+static void
+logicalrep_write_tuple(StringInfo out, Relation rel, HeapTuple tuple)
+{
+ TupleDesc desc;
+ Datum values[MaxTupleAttributeNumber];
+ bool isnull[MaxTupleAttributeNumber];
+ int i;
+ uint16 nliveatts = 0;
+
+ desc = RelationGetDescr(rel);
+
+ for (i = 0; i < desc->natts; i++)
+ {
+ if (desc->attrs[i]->attisdropped)
+ continue;
+ nliveatts++;
+ }
+ pq_sendint(out, nliveatts, 2);
+
+ /* try to allocate enough memory from the get-go */
+ enlargeStringInfo(out, tuple->t_len +
+ nliveatts * (1 + 4));
+
+ heap_deform_tuple(tuple, desc, values, isnull);
+
+ /* Write the values */
+ for (i = 0; i < desc->natts; i++)
+ {
+ HeapTuple typtup;
+ Form_pg_type typclass;
+ Form_pg_attribute att = desc->attrs[i];
+ char *outputstr;
+ int len;
+
+ /* skip dropped columns */
+ if (att->attisdropped)
+ continue;
+
+ if (isnull[i])
+ {
+ pq_sendbyte(out, 'n'); /* null column */
+ continue;
+ }
+ else if (att->attlen == -1 && VARATT_IS_EXTERNAL_ONDISK(values[i]))
+ {
+ pq_sendbyte(out, 'u'); /* unchanged toast column */
+ continue;
+ }
+
+ typtup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(att->atttypid));
+ if (!HeapTupleIsValid(typtup))
+ elog(ERROR, "cache lookup failed for type %u", att->atttypid);
+ typclass = (Form_pg_type) GETSTRUCT(typtup);
+
+ pq_sendbyte(out, 't'); /* 'text' data follows */
+
+ outputstr = OidOutputFunctionCall(typclass->typoutput, values[i]);
+ len = strlen(outputstr) + 1; /* null terminated */
+ pq_sendint(out, len, 4); /* length */
+ appendBinaryStringInfo(out, outputstr, len); /* data */
+
+ pfree(outputstr);
+
+ ReleaseSysCache(typtup);
+ }
+}
+
+/*
+ * Read tuple in remote format from stream.
+ *
+ * The returned tuple points into the input stringinfo.
+ */
+static void
+logicalrep_read_tuple(StringInfo in, LogicalRepTupleData *tuple)
+{
+ int i;
+ int natts;
+
+ /* Get of attributes. */
+ natts = pq_getmsgint(in, 2);
+
+ memset(tuple->changed, 0, sizeof(tuple->changed));
+
+ /* Read the data */
+ for (i = 0; i < natts; i++)
+ {
+ char kind;
+ int len;
+
+ kind = pq_getmsgbyte(in);
+
+ switch (kind)
+ {
+ case 'n': /* null */
+ tuple->values[i] = NULL;
+ tuple->changed[i] = true;
+ break;
+ case 'u': /* unchanged column */
+ tuple->values[i] = (char *) 0xdeadbeef; /* make bad usage more obvious */
+ break;
+ case 't': /* text formatted value */
+ {
+ tuple->changed[i] = true;
+
+ len = pq_getmsgint(in, 4); /* read length */
+
+ /* and data */
+ tuple->values[i] = (char *) pq_getmsgbytes(in, len);
+ }
+ break;
+ default:
+ elog(ERROR, "unknown data representation type '%c'", kind);
+ }
+ }
+}
+
+/*
+ * Write relation attributes to the stream.
+ */
+static void
+logicalrep_write_attrs(StringInfo out, Relation rel)
+{
+ TupleDesc desc;
+ int i;
+ uint16 nliveatts = 0;
+ Bitmapset *idattrs = NULL;
+ bool replidentfull;
+
+ desc = RelationGetDescr(rel);
+
+ /* send number of live attributes */
+ for (i = 0; i < desc->natts; i++)
+ {
+ if (desc->attrs[i]->attisdropped)
+ continue;
+ nliveatts++;
+ }
+ pq_sendint(out, nliveatts, 2);
+
+ /* fetch bitmap of REPLICATION IDENTITY attributes */
+ replidentfull = (rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL);
+ if (!replidentfull)
+ idattrs = RelationGetIndexAttrBitmap(rel,
+ INDEX_ATTR_BITMAP_IDENTITY_KEY);
+
+ /* send the attributes */
+ for (i = 0; i < desc->natts; i++)
+ {
+ Form_pg_attribute att = desc->attrs[i];
+ uint8 flags = 0;
+
+ if (att->attisdropped)
+ continue;
+
+ /* REPLICA IDENTITY FULL means all colums are sent as part of key. */
+ if (replidentfull ||
+ bms_is_member(att->attnum - FirstLowInvalidHeapAttributeNumber,
+ idattrs))
+ flags |= LOGICALREP_IS_REPLICA_IDENTITY;
+
+ pq_sendbyte(out, flags);
+
+ /* attribute name */
+ pq_sendstring(out, NameStr(att->attname));
+
+ /* attribute type id */
+ pq_sendint(out, (int) att->atttypid, sizeof(att->atttypid));
+
+ /* attribute mode */
+ pq_sendint(out, att->atttypmod, sizeof(att->atttypmod));
+ }
+
+ bms_free(idattrs);
+}
+
+/*
+ * Read relation attribute names from the stream.
+ */
+static void
+logicalrep_read_attrs(StringInfo in, LogicalRepRelation *rel)
+{
+ int i;
+ int natts;
+ char **attnames;
+ Oid *atttyps;
+ Bitmapset *attkeys = NULL;
+
+ natts = pq_getmsgint(in, 2);
+ attnames = palloc(natts * sizeof(char *));
+ atttyps = palloc(natts * sizeof(Oid));
+
+ /* read the attributes */
+ for (i = 0; i < natts; i++)
+ {
+ uint8 flags;
+
+ /* Check for replica identity column */
+ flags = pq_getmsgbyte(in);
+ if (flags & LOGICALREP_IS_REPLICA_IDENTITY)
+ attkeys = bms_add_member(attkeys, i);
+
+ /* attribute name */
+ attnames[i] = pstrdup(pq_getmsgstring(in));
+
+ /* attribute type id */
+ atttyps[i] = (Oid) pq_getmsgint(in, 4);
+
+ /* we ignore attribute mode for now */
+ (void) pq_getmsgint(in, 4);
+ }
+
+ rel->attnames = attnames;
+ rel->atttyps = atttyps;
+ rel->attkeys = attkeys;
+ rel->natts = natts;
+}
+
+/*
+ * Write the namespace name or empty string for pg_catalog (to save space).
+ */
+static void
+logicalrep_write_namespace(StringInfo out, Oid nspid)
+{
+ if (nspid == PG_CATALOG_NAMESPACE)
+ pq_sendbyte(out, '\0');
+ else
+ {
+ char *nspname = get_namespace_name(nspid);
+
+ if (nspname == NULL)
+ elog(ERROR, "cache lookup failed for namespace %u",
+ nspid);
+
+ pq_sendstring(out, nspname);
+ }
+}
+
+/*
+ * Read the namespace name while treating empty string as pg_catalog.
+ */
+static const char *
+logicalrep_read_namespace(StringInfo in)
+{
+ const char *nspname = pq_getmsgstring(in);
+
+ if (nspname[0] == '\0')
+ nspname = "pg_catalog";
+
+ return nspname;
+}
diff --git a/src/backend/replication/logical/relation.c b/src/backend/replication/logical/relation.c
new file mode 100644
index 0000000000..383c6ebe76
--- /dev/null
+++ b/src/backend/replication/logical/relation.c
@@ -0,0 +1,489 @@
+/*-------------------------------------------------------------------------
+ * relation.c
+ * PostgreSQL logical replication
+ *
+ * Copyright (c) 2012-2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/replication/logical/relation.c
+ *
+ * NOTES
+ * This file contains helper functions for logical replication relation
+ * mapping cache.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/heapam.h"
+#include "access/sysattr.h"
+#include "catalog/namespace.h"
+#include "nodes/makefuncs.h"
+#include "replication/logicalrelation.h"
+#include "replication/worker_internal.h"
+#include "utils/builtins.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/syscache.h"
+
+static MemoryContext LogicalRepRelMapContext = NULL;
+
+static HTAB *LogicalRepRelMap = NULL;
+static HTAB *LogicalRepTypMap = NULL;
+
+static void logicalrep_typmap_invalidate_cb(Datum arg, int cacheid,
+ uint32 hashvalue);
+
+/*
+ * Relcache invalidation callback for our relation map cache.
+ */
+static void
+logicalrep_relmap_invalidate_cb(Datum arg, Oid reloid)
+{
+ LogicalRepRelMapEntry *entry;
+
+ /* Just to be sure. */
+ if (LogicalRepRelMap == NULL)
+ return;
+
+ if (reloid != InvalidOid)
+ {
+ HASH_SEQ_STATUS status;
+
+ hash_seq_init(&status, LogicalRepRelMap);
+
+ /* TODO, use inverse lookup hashtable? */
+ while ((entry = (LogicalRepRelMapEntry *) hash_seq_search(&status)) != NULL)
+ {
+ if (entry->localreloid == reloid)
+ {
+ entry->localreloid = InvalidOid;
+ hash_seq_term(&status);
+ break;
+ }
+ }
+ }
+ else
+ {
+ /* invalidate all cache entries */
+ HASH_SEQ_STATUS status;
+
+ hash_seq_init(&status, LogicalRepRelMap);
+
+ while ((entry = (LogicalRepRelMapEntry *) hash_seq_search(&status)) != NULL)
+ entry->localreloid = InvalidOid;
+ }
+}
+
+/*
+ * Initialize the relation map cache.
+ */
+static void
+logicalrep_relmap_init()
+{
+ HASHCTL ctl;
+
+ if (!LogicalRepRelMapContext)
+ LogicalRepRelMapContext =
+ AllocSetContextCreate(CacheMemoryContext,
+ "LogicalRepRelMapContext",
+ ALLOCSET_DEFAULT_SIZES);
+
+ /* Initialize the relation hash table. */
+ MemSet(&ctl, 0, sizeof(ctl));
+ ctl.keysize = sizeof(LogicalRepRelId);
+ ctl.entrysize = sizeof(LogicalRepRelMapEntry);
+ ctl.hcxt = LogicalRepRelMapContext;
+
+ LogicalRepRelMap = hash_create("logicalrep relation map cache", 128, &ctl,
+ HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+
+ /* Initialize the type hash table. */
+ MemSet(&ctl, 0, sizeof(ctl));
+ ctl.keysize = sizeof(Oid);
+ ctl.entrysize = sizeof(LogicalRepTyp);
+ ctl.hcxt = LogicalRepRelMapContext;
+
+ /* This will usually be small. */
+ LogicalRepTypMap = hash_create("logicalrep type map cache", 2, &ctl,
+ HASH_ELEM | HASH_BLOBS |HASH_CONTEXT);
+
+ /* Watch for invalidation events. */
+ CacheRegisterRelcacheCallback(logicalrep_relmap_invalidate_cb,
+ (Datum) 0);
+ CacheRegisterSyscacheCallback(TYPEOID, logicalrep_typmap_invalidate_cb,
+ (Datum) 0);
+}
+
+/*
+ * Free the entry of a relation map cache.
+ */
+static void
+logicalrep_relmap_free_entry(LogicalRepRelMapEntry *entry)
+{
+ LogicalRepRelation *remoterel;
+
+ remoterel = &entry->remoterel;
+
+ pfree(remoterel->nspname);
+ pfree(remoterel->relname);
+
+ if (remoterel->natts > 0)
+ {
+ int i;
+
+ for (i = 0; i < remoterel->natts; i++)
+ pfree(remoterel->attnames[i]);
+
+ pfree(remoterel->attnames);
+ pfree(remoterel->atttyps);
+ }
+ remoterel->attnames = NULL;
+ remoterel->atttyps = NULL;
+
+ bms_free(remoterel->attkeys);
+ remoterel->attkeys = NULL;
+
+ if (entry->attrmap)
+ pfree(entry->attrmap);
+
+ entry->attrmap = NULL;
+ remoterel->natts = 0;
+ entry->localreloid = InvalidOid;
+ entry->localrel = NULL;
+}
+
+/*
+ * Add new entry or update existing entry in the relation map cache.
+ *
+ * Called when new relation mapping is sent by the publisher to update
+ * our expected view of incoming data from said publisher.
+ */
+void
+logicalrep_relmap_update(LogicalRepRelation *remoterel)
+{
+ MemoryContext oldctx;
+ LogicalRepRelMapEntry *entry;
+ bool found;
+ int i;
+
+ if (LogicalRepRelMap == NULL)
+ logicalrep_relmap_init();
+
+ /*
+ * HASH_ENTER returns the existing entry if present or creates a new one.
+ */
+ entry = hash_search(LogicalRepRelMap, (void *) &remoterel->remoteid,
+ HASH_ENTER, &found);
+
+ if (found)
+ logicalrep_relmap_free_entry(entry);
+
+ /* Make cached copy of the data */
+ oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext);
+ entry->remoterel.remoteid = remoterel->remoteid;
+ entry->remoterel.nspname = pstrdup(remoterel->nspname);
+ entry->remoterel.relname = pstrdup(remoterel->relname);
+ entry->remoterel.natts = remoterel->natts;
+ entry->remoterel.attnames = palloc(remoterel->natts * sizeof(char *));
+ entry->remoterel.atttyps = palloc(remoterel->natts * sizeof(Oid));
+ for (i = 0; i < remoterel->natts; i++)
+ {
+ entry->remoterel.attnames[i] = pstrdup(remoterel->attnames[i]);
+ entry->remoterel.atttyps[i] = remoterel->atttyps[i];
+ }
+ entry->remoterel.replident = remoterel->replident;
+ entry->remoterel.attkeys = bms_copy(remoterel->attkeys);
+ entry->attrmap = NULL;
+ entry->localreloid = InvalidOid;
+ MemoryContextSwitchTo(oldctx);
+}
+
+/*
+ * Find attribute index in TupleDesc struct by attribute name.
+ *
+ * Returns -1 if not found.
+ */
+static int
+logicalrep_rel_att_by_name(LogicalRepRelation *remoterel, const char *attname)
+{
+ int i;
+
+ for (i = 0; i < remoterel->natts; i++)
+ {
+ if (strcmp(remoterel->attnames[i], attname) == 0)
+ return i;
+ }
+
+ return -1;
+}
+
+/*
+ * Open the local relation associated with the remote one.
+ *
+ * Optionally rebuilds the Relcache mapping if it was invalidated
+ * by local DDL.
+ */
+LogicalRepRelMapEntry *
+logicalrep_rel_open(LogicalRepRelId remoteid, LOCKMODE lockmode)
+{
+ LogicalRepRelMapEntry *entry;
+ bool found;
+
+ if (LogicalRepRelMap == NULL)
+ logicalrep_relmap_init();
+
+ /* Search for existing entry. */
+ entry = hash_search(LogicalRepRelMap, (void *) &remoteid,
+ HASH_FIND, &found);
+
+ if (!found)
+ elog(ERROR, "no relation map entry for remote relation ID %u",
+ remoteid);
+
+ /* Need to update the local cache? */
+ if (!OidIsValid(entry->localreloid))
+ {
+ Oid relid;
+ int i;
+ int found;
+ Bitmapset *idkey;
+ TupleDesc desc;
+ LogicalRepRelation *remoterel;
+ MemoryContext oldctx;
+ remoterel = &entry->remoterel;
+
+ /* Try to find and lock the relation by name. */
+ relid = RangeVarGetRelid(makeRangeVar(remoterel->nspname,
+ remoterel->relname, -1),
+ lockmode, true);
+ if (!OidIsValid(relid))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("logical replication target relation \"%s.%s\" does not exist",
+ remoterel->nspname, remoterel->relname)));
+ entry->localrel = heap_open(relid, NoLock);
+
+ /*
+ * We currently only support writing to regular and partitioned
+ * tables.
+ */
+ if (entry->localrel->rd_rel->relkind != RELKIND_RELATION)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("logical replication target relation \"%s.%s\" is not a table",
+ remoterel->nspname, remoterel->relname)));
+
+ /*
+ * Build the mapping of local attribute numbers to remote attribute
+ * numbers and validate that we don't miss any replicated columns
+ * as that would result in potentially unwanted data loss.
+ */
+ desc = RelationGetDescr(entry->localrel);
+ oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext);
+ entry->attrmap = palloc(desc->natts * sizeof(int));
+ MemoryContextSwitchTo(oldctx);
+
+ found = 0;
+ for (i = 0; i < desc->natts; i++)
+ {
+ int attnum = logicalrep_rel_att_by_name(remoterel,
+ NameStr(desc->attrs[i]->attname));
+ entry->attrmap[i] = attnum;
+ if (attnum >= 0)
+ found++;
+ }
+
+ /* TODO, detail message with names of missing columns */
+ if (found < remoterel->natts)
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("logical replication target relation \"%s.%s\" is missing "
+ "some replicated columns",
+ remoterel->nspname, remoterel->relname)));
+
+ /*
+ * Check that replica identity matches. We allow for stricter replica
+ * identity (fewer columns) on subscriber as that will not stop us
+ * from finding unique tuple. IE, if publisher has identity
+ * (id,timestamp) and subscriber just (id) this will not be a problem,
+ * but in the opposite scenario it will.
+ *
+ * Don't throw any error here just mark the relation entry as not
+ * updatable, as replica identity is only for updates and deletes
+ * but inserts can be replicated even without it.
+ */
+ entry->updatable = true;
+ idkey = RelationGetIndexAttrBitmap(entry->localrel,
+ INDEX_ATTR_BITMAP_IDENTITY_KEY);
+ /* fallback to PK if no replica identity */
+ if (idkey == NULL)
+ {
+ idkey = RelationGetIndexAttrBitmap(entry->localrel,
+ INDEX_ATTR_BITMAP_PRIMARY_KEY);
+ /*
+ * If no replica identity index and no PK, the published table
+ * must have replica identity FULL.
+ */
+ if (idkey == NULL && remoterel->replident != REPLICA_IDENTITY_FULL)
+ entry->updatable = false;
+ }
+
+ i = -1;
+ while ((i = bms_next_member(idkey, i)) >= 0)
+ {
+ int attnum = i + FirstLowInvalidHeapAttributeNumber;
+
+ if (!AttrNumberIsForUserDefinedAttr(attnum))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("logical replication target relation \"%s.%s\" uses "
+ "system columns in REPLICA IDENTITY index",
+ remoterel->nspname, remoterel->relname)));
+
+ attnum = AttrNumberGetAttrOffset(attnum);
+
+ if (!bms_is_member(entry->attrmap[attnum], remoterel->attkeys))
+ {
+ entry->updatable = false;
+ break;
+ }
+ }
+
+ entry->localreloid = relid;
+ }
+ else
+ entry->localrel = heap_open(entry->localreloid, lockmode);
+
+ return entry;
+}
+
+/*
+ * Close the previously opened logical relation.
+ */
+void
+logicalrep_rel_close(LogicalRepRelMapEntry *rel, LOCKMODE lockmode)
+{
+ heap_close(rel->localrel, lockmode);
+ rel->localrel = NULL;
+}
+
+
+/*
+ * Type cache invalidation callback for our type map cache.
+ */
+static void
+logicalrep_typmap_invalidate_cb(Datum arg, int cacheid, uint32 hashvalue)
+{
+ HASH_SEQ_STATUS status;
+ LogicalRepTyp *entry;
+
+ /* Just to be sure. */
+ if (LogicalRepTypMap == NULL)
+ return;
+
+ /* invalidate all cache entries */
+ hash_seq_init(&status, LogicalRepTypMap);
+
+ while ((entry = (LogicalRepTyp *) hash_seq_search(&status)) != NULL)
+ entry->typoid = InvalidOid;
+}
+
+/*
+ * Free the type map cache entry data.
+ */
+static void
+logicalrep_typmap_free_entry(LogicalRepTyp *entry)
+{
+ pfree(entry->nspname);
+ pfree(entry->typname);
+
+ entry->typoid = InvalidOid;
+}
+
+/*
+ * Add new entry or update existing entry in the type map cache.
+ */
+void
+logicalrep_typmap_update(LogicalRepTyp *remotetyp)
+{
+ MemoryContext oldctx;
+ LogicalRepTyp *entry;
+ bool found;
+
+ if (LogicalRepTypMap == NULL)
+ logicalrep_relmap_init();
+
+ /*
+ * HASH_ENTER returns the existing entry if present or creates a new one.
+ */
+ entry = hash_search(LogicalRepTypMap, (void *) &remotetyp->remoteid,
+ HASH_ENTER, &found);
+
+ if (found)
+ logicalrep_typmap_free_entry(entry);
+
+ /* Make cached copy of the data */
+ entry->remoteid = remotetyp->remoteid;
+ oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext);
+ entry->nspname = pstrdup(remotetyp->nspname);
+ entry->typname = pstrdup(remotetyp->typname);
+ MemoryContextSwitchTo(oldctx);
+ entry->typoid = InvalidOid;
+}
+
+/*
+ * Fetch type info from the cache.
+ */
+Oid
+logicalrep_typmap_getid(Oid remoteid)
+{
+ LogicalRepTyp *entry;
+ bool found;
+ Oid nspoid;
+
+ /* Internal types are mapped directly. */
+ if (remoteid < FirstNormalObjectId)
+ {
+ if (!get_typisdefined(remoteid))
+ ereport(ERROR,
+ (errmsg("builtin type %u not found", remoteid),
+ errhint("This can be caused by having publisher with "
+ "higher major version than subscriber")));
+ return remoteid;
+ }
+
+ if (LogicalRepTypMap == NULL)
+ logicalrep_relmap_init();
+
+ /* Try finding the mapping. */
+ entry = hash_search(LogicalRepTypMap, (void *) &remoteid,
+ HASH_FIND, &found);
+
+ if (!found)
+ elog(ERROR, "no type map entry for remote type %u",
+ remoteid);
+
+ /* Found and mapped, return the oid. */
+ if (OidIsValid(entry->typoid))
+ return entry->typoid;
+
+ /* Otherwise, try to map to local type. */
+ nspoid = LookupExplicitNamespace(entry->nspname, true);
+ if (OidIsValid(nspoid))
+ entry->typoid = GetSysCacheOid2(TYPENAMENSP,
+ PointerGetDatum(entry->typname),
+ ObjectIdGetDatum(nspoid));
+ else
+ entry->typoid = InvalidOid;
+
+ if (!OidIsValid(entry->typoid))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("data type \"%s.%s\" required for logical replication does not exist",
+ entry->nspname, entry->typname)));
+
+ return entry->typoid;
+}
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
new file mode 100644
index 0000000000..7d86736444
--- /dev/null
+++ b/src/backend/replication/logical/worker.c
@@ -0,0 +1,1429 @@
+/*-------------------------------------------------------------------------
+ * worker.c
+ * PostgreSQL logical replication worker (apply)
+ *
+ * Copyright (c) 2012-2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/replication/logical/worker.c
+ *
+ * NOTES
+ * This file contains the worker which applies logical changes as they come
+ * from remote logical replication stream.
+ *
+ * The main worker (apply) is started by logical replication worker
+ * launcher for every enabled subscription in a database. It uses
+ * walsender protocol to communicate with publisher.
+ *
+ * The apply worker may spawn additional workers (sync) for initial data
+ * synchronization of tables.
+ *
+ * This module includes server facing code and shares libpqwalreceiver
+ * module with walreceiver for providing the libpq specific functionality.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "funcapi.h"
+
+#include "access/xact.h"
+#include "access/xlog_internal.h"
+
+#include "catalog/namespace.h"
+#include "catalog/pg_subscription.h"
+
+#include "commands/trigger.h"
+
+#include "executor/executor.h"
+#include "executor/nodeModifyTable.h"
+
+#include "libpq/pqformat.h"
+#include "libpq/pqsignal.h"
+
+#include "mb/pg_wchar.h"
+
+#include "nodes/makefuncs.h"
+
+#include "optimizer/planner.h"
+
+#include "parser/parse_relation.h"
+
+#include "postmaster/bgworker.h"
+#include "postmaster/postmaster.h"
+
+#include "replication/decode.h"
+#include "replication/logical.h"
+#include "replication/logicalproto.h"
+#include "replication/logicalrelation.h"
+#include "replication/logicalworker.h"
+#include "replication/reorderbuffer.h"
+#include "replication/origin.h"
+#include "replication/snapbuild.h"
+#include "replication/walreceiver.h"
+#include "replication/worker_internal.h"
+
+#include "rewrite/rewriteHandler.h"
+
+#include "storage/bufmgr.h"
+#include "storage/ipc.h"
+#include "storage/lmgr.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
+
+#include "utils/builtins.h"
+#include "utils/catcache.h"
+#include "utils/datum.h"
+#include "utils/fmgroids.h"
+#include "utils/guc.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/timeout.h"
+#include "utils/tqual.h"
+#include "utils/syscache.h"
+
+#define NAPTIME_PER_CYCLE 1000 /* max sleep time between cycles (1s) */
+
+typedef struct FlushPosition
+{
+ dlist_node node;
+ XLogRecPtr local_end;
+ XLogRecPtr remote_end;
+} FlushPosition;
+
+static dlist_head lsn_mapping = DLIST_STATIC_INIT(lsn_mapping);
+
+typedef struct SlotErrCallbackArg
+{
+ LogicalRepRelation *rel;
+ int attnum;
+} SlotErrCallbackArg;
+
+static MemoryContext ApplyContext = NULL;
+static MemoryContext ApplyCacheContext = NULL;
+
+WalReceiverConn *wrconn = NULL;
+
+Subscription *MySubscription = NULL;
+bool MySubscriptionValid = false;
+
+bool in_remote_transaction = false;
+
+static void send_feedback(XLogRecPtr recvpos, bool force, bool requestReply);
+
+static void store_flush_position(XLogRecPtr remote_lsn);
+
+static void reread_subscription(void);
+
+/*
+ * Make sure that we started local transaction.
+ *
+ * Also switches to ApplyContext as necessary.
+ */
+static bool
+ensure_transaction(void)
+{
+ if (IsTransactionState())
+ {
+ if (CurrentMemoryContext != ApplyContext)
+ MemoryContextSwitchTo(ApplyContext);
+ return false;
+ }
+
+ StartTransactionCommand();
+
+ if (!MySubscriptionValid)
+ reread_subscription();
+
+ MemoryContextSwitchTo(ApplyContext);
+ return true;
+}
+
+
+/*
+ * Executor state preparation for evaluation of constraint expressions,
+ * indexes and triggers.
+ *
+ * This is based on similar code in copy.c
+ */
+static EState *
+create_estate_for_relation(LogicalRepRelMapEntry *rel)
+{
+ EState *estate;
+ ResultRelInfo *resultRelInfo;
+ RangeTblEntry *rte;
+
+ estate = CreateExecutorState();
+
+ rte = makeNode(RangeTblEntry);
+ rte->rtekind = RTE_RELATION;
+ rte->relid = RelationGetRelid(rel->localrel);
+ rte->relkind = rel->localrel->rd_rel->relkind;
+ estate->es_range_table = list_make1(rte);
+
+ resultRelInfo = makeNode(ResultRelInfo);
+ InitResultRelInfo(resultRelInfo, rel->localrel, 1, NULL, 0);
+
+ estate->es_result_relations = resultRelInfo;
+ estate->es_num_result_relations = 1;
+ estate->es_result_relation_info = resultRelInfo;
+
+ /* Triggers might need a slot */
+ if (resultRelInfo->ri_TrigDesc)
+ estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate);
+
+ return estate;
+}
+
+/*
+ * Executes default values for columns for which we can't map to remote
+ * relation columns.
+ *
+ * This allows us to support tables which have more columns on the downstream
+ * than on the upstream.
+ */
+static void
+slot_fill_defaults(LogicalRepRelMapEntry *rel, EState *estate,
+ TupleTableSlot *slot)
+{
+ TupleDesc desc = RelationGetDescr(rel->localrel);
+ int num_phys_attrs = desc->natts;
+ int i;
+ int attnum,
+ num_defaults = 0;
+ int *defmap;
+ ExprState **defexprs;
+ ExprContext *econtext;
+
+ econtext = GetPerTupleExprContext(estate);
+
+ /* We got all the data via replication, no need to evaluate anything. */
+ if (num_phys_attrs == rel->remoterel.natts)
+ return;
+
+ defmap = (int *) palloc(num_phys_attrs * sizeof(int));
+ defexprs = (ExprState **) palloc(num_phys_attrs * sizeof(ExprState *));
+
+ for (attnum = 0; attnum < num_phys_attrs; attnum++)
+ {
+ Expr *defexpr;
+
+ if (desc->attrs[attnum]->attisdropped)
+ continue;
+
+ if (rel->attrmap[attnum] >= 0)
+ continue;
+
+ defexpr = (Expr *) build_column_default(rel->localrel, attnum + 1);
+
+ if (defexpr != NULL)
+ {
+ /* Run the expression through planner */
+ defexpr = expression_planner(defexpr);
+
+ /* Initialize executable expression in copycontext */
+ defexprs[num_defaults] = ExecInitExpr(defexpr, NULL);
+ defmap[num_defaults] = attnum;
+ num_defaults++;
+ }
+
+ }
+
+ for (i = 0; i < num_defaults; i++)
+ slot->tts_values[defmap[i]] =
+ ExecEvalExpr(defexprs[i], econtext, &slot->tts_isnull[defmap[i]]);
+}
+
+/*
+ * Error callback to give more context info about type conversion failure.
+ */
+static void
+slot_store_error_callback(void *arg)
+{
+ SlotErrCallbackArg *errarg = (SlotErrCallbackArg *) arg;
+ Oid remotetypoid,
+ localtypoid;
+
+ if (errarg->attnum < 0)
+ return;
+
+ remotetypoid = errarg->rel->atttyps[errarg->attnum];
+ localtypoid = logicalrep_typmap_getid(remotetypoid);
+ errcontext("processing remote data for replication target relation \"%s.%s\" column \"%s\", "
+ "remote type %s, local type %s",
+ errarg->rel->nspname, errarg->rel->relname,
+ errarg->rel->attnames[errarg->attnum],
+ format_type_be(remotetypoid),
+ format_type_be(localtypoid));
+}
+
+/*
+ * Store data in C string form into slot.
+ * This is similar to BuildTupleFromCStrings but TupleTableSlot fits our
+ * use better.
+ */
+static void
+slot_store_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
+ char **values)
+{
+ int natts = slot->tts_tupleDescriptor->natts;
+ int i;
+ SlotErrCallbackArg errarg;
+ ErrorContextCallback errcallback;
+
+ ExecClearTuple(slot);
+
+ /* Push callback + info on the error context stack */
+ errarg.rel = &rel->remoterel;
+ errarg.attnum = -1;
+ errcallback.callback = slot_store_error_callback;
+ errcallback.arg = (void *) &errarg;
+ errcallback.previous = error_context_stack;
+ error_context_stack = &errcallback;
+
+ /* Call the "in" function for each non-dropped attribute */
+ for (i = 0; i < natts; i++)
+ {
+ Form_pg_attribute att = slot->tts_tupleDescriptor->attrs[i];
+ int remoteattnum = rel->attrmap[i];
+
+ if (!att->attisdropped && remoteattnum >= 0 &&
+ values[remoteattnum] != NULL)
+ {
+ Oid typinput;
+ Oid typioparam;
+
+ errarg.attnum = remoteattnum;
+
+ getTypeInputInfo(att->atttypid, &typinput, &typioparam);
+ slot->tts_values[i] = OidInputFunctionCall(typinput,
+ values[remoteattnum],
+ typioparam,
+ att->atttypmod);
+ slot->tts_isnull[i] = false;
+ }
+ else
+ {
+ /*
+ * We assign NULL to dropped attributes, NULL values, and missing
+ * values (missing values should be later filled using
+ * slot_fill_defaults).
+ */
+ slot->tts_values[i] = (Datum) 0;
+ slot->tts_isnull[i] = true;
+ }
+ }
+
+ /* Pop the error context stack */
+ error_context_stack = errcallback.previous;
+
+ ExecStoreVirtualTuple(slot);
+}
+
+/*
+ * Modify slot with user data provided as C strigs.
+ * This is somewhat similar to heap_modify_tuple but also calls the type
+ * input fuction on the user data as the input is the text representation
+ * of the types.
+ */
+static void
+slot_modify_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel,
+ char **values, bool *replaces)
+{
+ int natts = slot->tts_tupleDescriptor->natts;
+ int i;
+ SlotErrCallbackArg errarg;
+ ErrorContextCallback errcallback;
+
+ slot_getallattrs(slot);
+ ExecClearTuple(slot);
+
+ /* Push callback + info on the error context stack */
+ errarg.rel = &rel->remoterel;
+ errarg.attnum = -1;
+ errcallback.callback = slot_store_error_callback;
+ errcallback.arg = (void *) &errarg;
+ errcallback.previous = error_context_stack;
+ error_context_stack = &errcallback;
+
+ /* Call the "in" function for each replaced attribute */
+ for (i = 0; i < natts; i++)
+ {
+ Form_pg_attribute att = slot->tts_tupleDescriptor->attrs[i];
+ int remoteattnum = rel->attrmap[i];
+
+ if (remoteattnum >= 0 && !replaces[remoteattnum])
+ continue;
+
+ if (remoteattnum >= 0 && values[remoteattnum] != NULL)
+ {
+ Oid typinput;
+ Oid typioparam;
+
+ errarg.attnum = remoteattnum;
+
+ getTypeInputInfo(att->atttypid, &typinput, &typioparam);
+ slot->tts_values[i] = OidInputFunctionCall(typinput, values[i],
+ typioparam,
+ att->atttypmod);
+ slot->tts_isnull[i] = false;
+ }
+ else
+ {
+ slot->tts_values[i] = (Datum) 0;
+ slot->tts_isnull[i] = true;
+ }
+ }
+
+ /* Pop the error context stack */
+ error_context_stack = errcallback.previous;
+
+ ExecStoreVirtualTuple(slot);
+}
+
+/*
+ * Handle BEGIN message.
+ */
+static void
+apply_handle_begin(StringInfo s)
+{
+ LogicalRepBeginData begin_data;
+
+ logicalrep_read_begin(s, &begin_data);
+
+ replorigin_session_origin_timestamp = begin_data.committime;
+ replorigin_session_origin_lsn = begin_data.final_lsn;
+
+ in_remote_transaction = true;
+
+ pgstat_report_activity(STATE_RUNNING, NULL);
+}
+
+/*
+ * Handle COMMIT message.
+ *
+ * TODO, support tracking of multiple origins
+ */
+static void
+apply_handle_commit(StringInfo s)
+{
+ LogicalRepCommitData commit_data;
+
+ logicalrep_read_commit(s, &commit_data);
+
+ Assert(commit_data.commit_lsn == replorigin_session_origin_lsn);
+ Assert(commit_data.committime == replorigin_session_origin_timestamp);
+
+ if (IsTransactionState())
+ {
+ CommitTransactionCommand();
+
+ store_flush_position(commit_data.end_lsn);
+ }
+
+ in_remote_transaction = false;
+
+ pgstat_report_activity(STATE_IDLE, NULL);
+}
+
+/*
+ * Handle ORIGIN message.
+ *
+ * TODO, support tracking of multiple origins
+ */
+static void
+apply_handle_origin(StringInfo s)
+{
+ /*
+ * ORIGIN message can only come inside remote transaction and before
+ * any actual writes.
+ */
+ if (!in_remote_transaction || IsTransactionState())
+ ereport(ERROR,
+ (errcode(ERRCODE_PROTOCOL_VIOLATION),
+ errmsg("ORIGIN message sent out of order")));
+}
+
+/*
+ * Handle RELATION message.
+ *
+ * Note we don't do validation against local schema here. The validation
+ * against local schema is postponed until first change for given relation
+ * comes as we only care about it when applying changes for it anyway and we
+ * do less locking this way.
+ */
+static void
+apply_handle_relation(StringInfo s)
+{
+ LogicalRepRelation *rel;
+
+ rel = logicalrep_read_rel(s);
+ logicalrep_relmap_update(rel);
+}
+
+/*
+ * Handle TYPE message.
+ *
+ * Note we don't do local mapping here, that's done when the type is
+ * actually used.
+ */
+static void
+apply_handle_type(StringInfo s)
+{
+ LogicalRepTyp typ;
+
+ logicalrep_read_typ(s, &typ);
+ logicalrep_typmap_update(&typ);
+}
+
+/*
+ * Get replica identity index or if it is not defined a primary key.
+ *
+ * If neither is defined, returns InvalidOid
+ */
+static Oid
+GetRelationIdentityOrPK(Relation rel)
+{
+ Oid idxoid;
+
+ idxoid = RelationGetReplicaIndex(rel);
+
+ if (!OidIsValid(idxoid))
+ idxoid = RelationGetPrimaryKeyIndex(rel);
+
+ return idxoid;
+}
+
+/*
+ * Handle INSERT message.
+ */
+static void
+apply_handle_insert(StringInfo s)
+{
+ LogicalRepRelMapEntry *rel;
+ LogicalRepTupleData newtup;
+ LogicalRepRelId relid;
+ EState *estate;
+ TupleTableSlot *remoteslot;
+ MemoryContext oldctx;
+
+ ensure_transaction();
+
+ relid = logicalrep_read_insert(s, &newtup);
+ rel = logicalrep_rel_open(relid, RowExclusiveLock);
+
+ /* Initialize the executor state. */
+ estate = create_estate_for_relation(rel);
+ remoteslot = ExecInitExtraTupleSlot(estate);
+ ExecSetSlotDescriptor(remoteslot, RelationGetDescr(rel->localrel));
+
+ /* Process and store remote tuple in the slot */
+ oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
+ slot_store_cstrings(remoteslot, rel, newtup.values);
+ slot_fill_defaults(rel, estate, remoteslot);
+ MemoryContextSwitchTo(oldctx);
+
+ PushActiveSnapshot(GetTransactionSnapshot());
+ ExecOpenIndices(estate->es_result_relation_info, false);
+
+ /* Do the insert. */
+ ExecSimpleRelationInsert(estate, remoteslot);
+
+ /* Cleanup. */
+ ExecCloseIndices(estate->es_result_relation_info);
+ PopActiveSnapshot();
+ ExecResetTupleTable(estate->es_tupleTable, false);
+ FreeExecutorState(estate);
+
+ logicalrep_rel_close(rel, NoLock);
+
+ CommandCounterIncrement();
+}
+
+/*
+ * Check if the logical replication relation is updatable and throw
+ * appropriate error if it isn't.
+ */
+static void
+check_relation_updatable(LogicalRepRelMapEntry *rel)
+{
+ /* Updatable, no error. */
+ if (rel->updatable)
+ return;
+
+ /*
+ * We are in error mode so it's fine this is somewhat slow.
+ * It's better to give user correct error.
+ */
+ if (OidIsValid(GetRelationIdentityOrPK(rel->localrel)))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("publisher does not send replica identity column "
+ "expected by the logical replication target relation \"%s.%s\"",
+ rel->remoterel.nspname, rel->remoterel.relname)));
+ }
+
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("logical replication target relation \"%s.%s\" has "
+ "neither REPLICA IDENTIY index nor PRIMARY "
+ "KEY and published relation does not have "
+ "REPLICA IDENTITY FULL",
+ rel->remoterel.nspname, rel->remoterel.relname)));
+}
+
+/*
+ * Handle UPDATE message.
+ *
+ * TODO: FDW support
+ */
+static void
+apply_handle_update(StringInfo s)
+{
+ LogicalRepRelMapEntry *rel;
+ LogicalRepRelId relid;
+ Oid idxoid;
+ EState *estate;
+ EPQState epqstate;
+ LogicalRepTupleData oldtup;
+ LogicalRepTupleData newtup;
+ bool has_oldtup;
+ TupleTableSlot *localslot;
+ TupleTableSlot *remoteslot;
+ bool found;
+ MemoryContext oldctx;
+
+ ensure_transaction();
+
+ relid = logicalrep_read_update(s, &has_oldtup, &oldtup,
+ &newtup);
+ rel = logicalrep_rel_open(relid, RowExclusiveLock);
+
+ /* Check if we can do the update. */
+ check_relation_updatable(rel);
+
+ /* Initialize the executor state. */
+ estate = create_estate_for_relation(rel);
+ remoteslot = ExecInitExtraTupleSlot(estate);
+ ExecSetSlotDescriptor(remoteslot, RelationGetDescr(rel->localrel));
+ localslot = ExecInitExtraTupleSlot(estate);
+ ExecSetSlotDescriptor(localslot, RelationGetDescr(rel->localrel));
+ EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
+
+ PushActiveSnapshot(GetTransactionSnapshot());
+ ExecOpenIndices(estate->es_result_relation_info, false);
+
+ /* Build the search tuple. */
+ oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
+ slot_store_cstrings(remoteslot, rel,
+ has_oldtup ? oldtup.values : newtup.values);
+ MemoryContextSwitchTo(oldctx);
+
+ /*
+ * Try to find tuple using either replica identity index, primary key
+ * or if needed, sequential scan.
+ */
+ idxoid = GetRelationIdentityOrPK(rel->localrel);
+ Assert(OidIsValid(idxoid) ||
+ (rel->remoterel.replident == REPLICA_IDENTITY_FULL && has_oldtup));
+
+ if (OidIsValid(idxoid))
+ found = RelationFindReplTupleByIndex(rel->localrel, idxoid,
+ LockTupleExclusive,
+ remoteslot, localslot);
+ else
+ found = RelationFindReplTupleSeq(rel->localrel, LockTupleExclusive,
+ remoteslot, localslot);
+
+ ExecClearTuple(remoteslot);
+
+ /*
+ * Tuple found.
+ *
+ * Note this will fail if there are other conflicting unique indexes.
+ */
+ if (found)
+ {
+ /* Process and store remote tuple in the slot */
+ oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
+ ExecStoreTuple(localslot->tts_tuple, remoteslot, InvalidBuffer, false);
+ slot_modify_cstrings(remoteslot, rel, newtup.values, newtup.changed);
+ MemoryContextSwitchTo(oldctx);
+
+ EvalPlanQualSetSlot(&epqstate, remoteslot);
+
+ /* Do the actual update. */
+ ExecSimpleRelationUpdate(estate, &epqstate, localslot, remoteslot);
+ }
+ else
+ {
+ /*
+ * The tuple to be updated could not be found.
+ *
+ * TODO what to do here, change the log level to LOG perhaps?
+ */
+ elog(DEBUG1,
+ "logical replication did not find row for update "
+ "in replication target relation \"%s\"",
+ RelationGetRelationName(rel->localrel));
+ }
+
+ /* Cleanup. */
+ ExecCloseIndices(estate->es_result_relation_info);
+ PopActiveSnapshot();
+ EvalPlanQualEnd(&epqstate);
+ ExecResetTupleTable(estate->es_tupleTable, false);
+ FreeExecutorState(estate);
+
+ logicalrep_rel_close(rel, NoLock);
+
+ CommandCounterIncrement();
+}
+
+/*
+ * Handle DELETE message.
+ *
+ * TODO: FDW support
+ */
+static void
+apply_handle_delete(StringInfo s)
+{
+ LogicalRepRelMapEntry *rel;
+ LogicalRepTupleData oldtup;
+ LogicalRepRelId relid;
+ Oid idxoid;
+ EState *estate;
+ EPQState epqstate;
+ TupleTableSlot *remoteslot;
+ TupleTableSlot *localslot;
+ bool found;
+ MemoryContext oldctx;
+
+ ensure_transaction();
+
+ relid = logicalrep_read_delete(s, &oldtup);
+ rel = logicalrep_rel_open(relid, RowExclusiveLock);
+
+ /* Check if we can do the delete. */
+ check_relation_updatable(rel);
+
+ /* Initialize the executor state. */
+ estate = create_estate_for_relation(rel);
+ remoteslot = ExecInitExtraTupleSlot(estate);
+ ExecSetSlotDescriptor(remoteslot, RelationGetDescr(rel->localrel));
+ localslot = ExecInitExtraTupleSlot(estate);
+ ExecSetSlotDescriptor(localslot, RelationGetDescr(rel->localrel));
+ EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
+
+ PushActiveSnapshot(GetTransactionSnapshot());
+ ExecOpenIndices(estate->es_result_relation_info, false);
+
+ /* Find the tuple using the replica identity index. */
+ oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
+ slot_store_cstrings(remoteslot, rel, oldtup.values);
+ MemoryContextSwitchTo(oldctx);
+
+ /*
+ * Try to find tuple using either replica identity index, primary key
+ * or if needed, sequential scan.
+ */
+ idxoid = GetRelationIdentityOrPK(rel->localrel);
+ Assert(OidIsValid(idxoid) ||
+ (rel->remoterel.replident == REPLICA_IDENTITY_FULL));
+
+ if (OidIsValid(idxoid))
+ found = RelationFindReplTupleByIndex(rel->localrel, idxoid,
+ LockTupleExclusive,
+ remoteslot, localslot);
+ else
+ found = RelationFindReplTupleSeq(rel->localrel, LockTupleExclusive,
+ remoteslot, localslot);
+ /* If found delete it. */
+ if (found)
+ {
+ EvalPlanQualSetSlot(&epqstate, localslot);
+
+ /* Do the actual delete. */
+ ExecSimpleRelationDelete(estate, &epqstate, localslot);
+ }
+ else
+ {
+ /* The tuple to be deleted could not be found.*/
+ ereport(DEBUG1,
+ (errmsg("logical replication could not find row for delete "
+ "in replication target %s",
+ RelationGetRelationName(rel->localrel))));
+ }
+
+ /* Cleanup. */
+ ExecCloseIndices(estate->es_result_relation_info);
+ PopActiveSnapshot();
+ EvalPlanQualEnd(&epqstate);
+ ExecResetTupleTable(estate->es_tupleTable, false);
+ FreeExecutorState(estate);
+
+ logicalrep_rel_close(rel, NoLock);
+
+ CommandCounterIncrement();
+}
+
+
+/*
+ * Logical replication protocol message dispatcher.
+ */
+static void
+apply_dispatch(StringInfo s)
+{
+ char action = pq_getmsgbyte(s);
+
+ switch (action)
+ {
+ /* BEGIN */
+ case 'B':
+ apply_handle_begin(s);
+ break;
+ /* COMMIT */
+ case 'C':
+ apply_handle_commit(s);
+ break;
+ /* INSERT */
+ case 'I':
+ apply_handle_insert(s);
+ break;
+ /* UPDATE */
+ case 'U':
+ apply_handle_update(s);
+ break;
+ /* DELETE */
+ case 'D':
+ apply_handle_delete(s);
+ break;
+ /* RELATION */
+ case 'R':
+ apply_handle_relation(s);
+ break;
+ /* TYPE */
+ case 'Y':
+ apply_handle_type(s);
+ break;
+ /* ORIGIN */
+ case 'O':
+ apply_handle_origin(s);
+ break;
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_PROTOCOL_VIOLATION),
+ errmsg("invalid logical replication message type %c", action)));
+ }
+}
+
+/*
+ * Figure out which write/flush positions to report to the walsender process.
+ *
+ * We can't simply report back the last LSN the walsender sent us because the
+ * local transaction might not yet be flushed to disk locally. Instead we
+ * build a list that associates local with remote LSNs for every commit. When
+ * reporting back the flush position to the sender we iterate that list and
+ * check which entries on it are already locally flushed. Those we can report
+ * as having been flushed.
+ *
+ * The have_pending_txes is true if there are outstanding transactions that
+ * need to be flushed.
+ */
+static void
+get_flush_position(XLogRecPtr *write, XLogRecPtr *flush,
+ bool *have_pending_txes)
+{
+ dlist_mutable_iter iter;
+ XLogRecPtr local_flush = GetFlushRecPtr();
+
+ *write = InvalidXLogRecPtr;
+ *flush = InvalidXLogRecPtr;
+
+ dlist_foreach_modify(iter, &lsn_mapping)
+ {
+ FlushPosition *pos =
+ dlist_container(FlushPosition, node, iter.cur);
+
+ *write = pos->remote_end;
+
+ if (pos->local_end <= local_flush)
+ {
+ *flush = pos->remote_end;
+ dlist_delete(iter.cur);
+ pfree(pos);
+ }
+ else
+ {
+ /*
+ * Don't want to uselessly iterate over the rest of the list which
+ * could potentially be long. Instead get the last element and
+ * grab the write position from there.
+ */
+ pos = dlist_tail_element(FlushPosition, node,
+ &lsn_mapping);
+ *write = pos->remote_end;
+ *have_pending_txes = true;
+ return;
+ }
+ }
+
+ *have_pending_txes = !dlist_is_empty(&lsn_mapping);
+}
+
+/*
+ * Store current remote/local lsn pair in the tracking list.
+ */
+static void
+store_flush_position(XLogRecPtr remote_lsn)
+{
+ FlushPosition *flushpos;
+
+ /* Need to do this in permanent context */
+ MemoryContextSwitchTo(ApplyCacheContext);
+
+ /* Track commit lsn */
+ flushpos = (FlushPosition *) palloc(sizeof(FlushPosition));
+ flushpos->local_end = XactLastCommitEnd;
+ flushpos->remote_end = remote_lsn;
+
+ dlist_push_tail(&lsn_mapping, &flushpos->node);
+ MemoryContextSwitchTo(ApplyContext);
+}
+
+
+/* Update statistics of the worker. */
+static void
+UpdateWorkerStats(XLogRecPtr last_lsn, TimestampTz send_time, bool reply)
+{
+ MyLogicalRepWorker->last_lsn = last_lsn;
+ MyLogicalRepWorker->last_send_time = send_time;
+ MyLogicalRepWorker->last_recv_time = GetCurrentTimestamp();
+ if (reply)
+ {
+ MyLogicalRepWorker->reply_lsn = last_lsn;
+ MyLogicalRepWorker->reply_time = send_time;
+ }
+}
+
+/*
+ * Apply main loop.
+ */
+static void
+ApplyLoop(void)
+{
+ XLogRecPtr last_received = InvalidXLogRecPtr;
+
+ /* Init the ApplyContext which we use for easier cleanup. */
+ ApplyContext = AllocSetContextCreate(TopMemoryContext,
+ "ApplyContext",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+
+ /* mark as idle, before starting to loop */
+ pgstat_report_activity(STATE_IDLE, NULL);
+
+ while (!got_SIGTERM)
+ {
+ pgsocket fd = PGINVALID_SOCKET;
+ int rc;
+ int len;
+ char *buf = NULL;
+ bool endofstream = false;
+ TimestampTz last_recv_timestamp = GetCurrentTimestamp();
+ bool ping_sent = false;
+
+ MemoryContextSwitchTo(ApplyContext);
+
+ len = walrcv_receive(wrconn, &buf, &fd);
+
+ if (len != 0)
+ {
+ /* Process the data */
+ for (;;)
+ {
+ CHECK_FOR_INTERRUPTS();
+
+ if (len == 0)
+ {
+ break;
+ }
+ else if (len < 0)
+ {
+ ereport(LOG,
+ (errmsg("data stream from publisher has ended")));
+ endofstream = true;
+ break;
+ }
+ else
+ {
+ int c;
+ StringInfoData s;
+
+ /* Reset timeout. */
+ last_recv_timestamp = GetCurrentTimestamp();
+ ping_sent = false;
+
+ /* Ensure we are reading the data into our memory context. */
+ MemoryContextSwitchTo(ApplyContext);
+
+ s.data = buf;
+ s.len = len;
+ s.cursor = 0;
+ s.maxlen = -1;
+
+ c = pq_getmsgbyte(&s);
+
+ if (c == 'w')
+ {
+ XLogRecPtr start_lsn;
+ XLogRecPtr end_lsn;
+ TimestampTz send_time;
+
+ start_lsn = pq_getmsgint64(&s);
+ end_lsn = pq_getmsgint64(&s);
+ send_time =
+ IntegerTimestampToTimestampTz(pq_getmsgint64(&s));
+
+ if (last_received < start_lsn)
+ last_received = start_lsn;
+
+ if (last_received < end_lsn)
+ last_received = end_lsn;
+
+ UpdateWorkerStats(last_received, send_time, false);
+
+ apply_dispatch(&s);
+ }
+ else if (c == 'k')
+ {
+ XLogRecPtr endpos;
+ TimestampTz timestamp;
+ bool reply_requested;
+
+ endpos = pq_getmsgint64(&s);
+ timestamp =
+ IntegerTimestampToTimestampTz(pq_getmsgint64(&s));
+ reply_requested = pq_getmsgbyte(&s);
+
+ send_feedback(endpos, reply_requested, false);
+ UpdateWorkerStats(last_received, timestamp, true);
+ }
+ /* other message types are purposefully ignored */
+ }
+
+ len = walrcv_receive(wrconn, &buf, &fd);
+ }
+ }
+
+ if (!in_remote_transaction)
+ {
+ /*
+ * If we didn't get any transactions for a while there might be
+ * unconsumed invalidation messages in the queue, consume them now.
+ */
+ StartTransactionCommand();
+ /* Check for subscription change */
+ if (!MySubscriptionValid)
+ reread_subscription();
+ CommitTransactionCommand();
+ }
+
+ /* confirm all writes at once */
+ send_feedback(last_received, false, false);
+
+ /* Cleanup the memory. */
+ MemoryContextResetAndDeleteChildren(ApplyContext);
+ MemoryContextSwitchTo(TopMemoryContext);
+
+ /* Check if we need to exit the streaming loop. */
+ if (endofstream)
+ break;
+
+ /*
+ * Wait for more data or latch.
+ */
+ rc = WaitLatchOrSocket(&MyProc->procLatch,
+ WL_SOCKET_READABLE | WL_LATCH_SET |
+ WL_TIMEOUT | WL_POSTMASTER_DEATH,
+ fd, NAPTIME_PER_CYCLE,
+ WAIT_EVENT_LOGICAL_APPLY_MAIN);
+
+ /* Emergency bailout if postmaster has died */
+ if (rc & WL_POSTMASTER_DEATH)
+ proc_exit(1);
+
+ if (rc & WL_TIMEOUT)
+ {
+ /*
+ * We didn't receive anything new. If we haven't heard
+ * anything from the server for more than
+ * wal_receiver_timeout / 2, ping the server. Also, if
+ * it's been longer than wal_receiver_status_interval
+ * since the last update we sent, send a status update to
+ * the master anyway, to report any progress in applying
+ * WAL.
+ */
+ bool requestReply = false;
+
+ /*
+ * Check if time since last receive from standby has
+ * reached the configured limit.
+ */
+ if (wal_receiver_timeout > 0)
+ {
+ TimestampTz now = GetCurrentTimestamp();
+ TimestampTz timeout;
+
+ timeout =
+ TimestampTzPlusMilliseconds(last_recv_timestamp,
+ wal_receiver_timeout);
+
+ if (now >= timeout)
+ ereport(ERROR,
+ (errmsg("terminating logical replication worker due to timeout")));
+
+ /*
+ * We didn't receive anything new, for half of
+ * receiver replication timeout. Ping the server.
+ */
+ if (!ping_sent)
+ {
+ timeout = TimestampTzPlusMilliseconds(last_recv_timestamp,
+ (wal_receiver_timeout / 2));
+ if (now >= timeout)
+ {
+ requestReply = true;
+ ping_sent = true;
+ }
+ }
+ }
+
+ send_feedback(last_received, requestReply, requestReply);
+ }
+
+ ResetLatch(&MyProc->procLatch);
+ }
+}
+
+/*
+ * Send a Standby Status Update message to server.
+ *
+ * 'recvpos' is the latest LSN we've received data to, force is set if we need
+ * to send a response to avoid timeouts.
+ */
+static void
+send_feedback(XLogRecPtr recvpos, bool force, bool requestReply)
+{
+ static StringInfo reply_message = NULL;
+ static TimestampTz send_time = 0;
+
+ static XLogRecPtr last_recvpos = InvalidXLogRecPtr;
+ static XLogRecPtr last_writepos = InvalidXLogRecPtr;
+ static XLogRecPtr last_flushpos = InvalidXLogRecPtr;
+
+ XLogRecPtr writepos;
+ XLogRecPtr flushpos;
+ TimestampTz now;
+ bool have_pending_txes;
+
+ /*
+ * If the user doesn't want status to be reported to the publisher, be
+ * sure to exit before doing anything at all.
+ */
+ if (!force && wal_receiver_status_interval <= 0)
+ return;
+
+ /* It's legal to not pass a recvpos */
+ if (recvpos < last_recvpos)
+ recvpos = last_recvpos;
+
+ get_flush_position(&writepos, &flushpos, &have_pending_txes);
+
+ /*
+ * No outstanding transactions to flush, we can report the latest
+ * received position. This is important for synchronous replication.
+ */
+ if (!have_pending_txes)
+ flushpos = writepos = recvpos;
+
+ if (writepos < last_writepos)
+ writepos = last_writepos;
+
+ if (flushpos < last_flushpos)
+ flushpos = last_flushpos;
+
+ now = GetCurrentTimestamp();
+
+ /* if we've already reported everything we're good */
+ if (!force &&
+ writepos == last_writepos &&
+ flushpos == last_flushpos &&
+ !TimestampDifferenceExceeds(send_time, now,
+ wal_receiver_status_interval * 1000))
+ return;
+ send_time = now;
+
+ if (!reply_message)
+ {
+ MemoryContext oldctx = MemoryContextSwitchTo(ApplyCacheContext);
+ reply_message = makeStringInfo();
+ MemoryContextSwitchTo(oldctx);
+ }
+ else
+ resetStringInfo(reply_message);
+
+ pq_sendbyte(reply_message, 'r');
+ pq_sendint64(reply_message, recvpos); /* write */
+ pq_sendint64(reply_message, flushpos); /* flush */
+ pq_sendint64(reply_message, writepos); /* apply */
+ pq_sendint64(reply_message, now); /* sendTime */
+ pq_sendbyte(reply_message, requestReply); /* replyRequested */
+
+ elog(DEBUG2, "sending feedback (force %d) to recv %X/%X, write %X/%X, flush %X/%X",
+ force,
+ (uint32) (recvpos >> 32), (uint32) recvpos,
+ (uint32) (writepos >> 32), (uint32) writepos,
+ (uint32) (flushpos >> 32), (uint32) flushpos
+ );
+
+ walrcv_send(wrconn, reply_message->data, reply_message->len);
+
+ if (recvpos > last_recvpos)
+ last_recvpos = recvpos;
+ if (writepos > last_writepos)
+ last_writepos = writepos;
+ if (flushpos > last_flushpos)
+ last_flushpos = flushpos;
+}
+
+
+/*
+ * Reread subscription info and exit on change.
+ */
+static void
+reread_subscription(void)
+{
+ MemoryContext oldctx;
+ Subscription *newsub;
+
+ /* Ensure allocations in permanent context. */
+ oldctx = MemoryContextSwitchTo(ApplyCacheContext);
+
+ newsub = GetSubscription(MyLogicalRepWorker->subid, true);
+
+ /*
+ * Exit if connection string was changed. The launcher will start
+ * new worker.
+ */
+ if (strcmp(newsub->conninfo, MySubscription->conninfo) != 0)
+ {
+ ereport(LOG,
+ (errmsg("logical replication worker for subscription \"%s\" will "
+ "restart because the connection information was changed",
+ MySubscription->name)));
+
+ walrcv_disconnect(wrconn);
+ proc_exit(0);
+ }
+
+ /*
+ * Exit if publication list was changed. The launcher will start
+ * new worker.
+ */
+ if (!equal(newsub->publications, MySubscription->publications))
+ {
+ ereport(LOG,
+ (errmsg("logical replication worker for subscription \"%s\" will "
+ "restart because subscription's publications were changed",
+ MySubscription->name)));
+
+ walrcv_disconnect(wrconn);
+ proc_exit(0);
+ }
+
+ /*
+ * Exit if the subscription was removed.
+ * This normally should not happen as the worker gets killed
+ * during DROP SUBSCRIPTION.
+ */
+ if (!newsub)
+ {
+ ereport(LOG,
+ (errmsg("logical replication worker for subscription \"%s\" will "
+ "stop because the subscription was removed",
+ MySubscription->name)));
+
+ walrcv_disconnect(wrconn);
+ proc_exit(0);
+ }
+
+ /*
+ * Exit if the subscription was disabled.
+ * This normally should not happen as the worker gets killed
+ * during ALTER SUBSCRIPTION ... DISABLE.
+ */
+ if (!newsub->enabled)
+ {
+ ereport(LOG,
+ (errmsg("logical replication worker for subscription \"%s\" will "
+ "stop because the subscription was disabled",
+ MySubscription->name)));
+
+ walrcv_disconnect(wrconn);
+ proc_exit(0);
+ }
+
+ /* Check for other changes that should never happen too. */
+ if (newsub->dbid != MySubscription->dbid ||
+ strcmp(newsub->name, MySubscription->name) != 0 ||
+ strcmp(newsub->slotname, MySubscription->slotname) != 0)
+ {
+ elog(ERROR, "subscription %u changed unexpectedly",
+ MyLogicalRepWorker->subid);
+ }
+
+ /* Clean old subscription info and switch to new one. */
+ FreeSubscription(MySubscription);
+ MySubscription = newsub;
+
+ MemoryContextSwitchTo(oldctx);
+
+ MySubscriptionValid = true;
+}
+
+/*
+ * Callback from subscription syscache invalidation.
+ */
+static void
+subscription_change_cb(Datum arg, int cacheid, uint32 hashvalue)
+{
+ MySubscriptionValid = false;
+}
+
+
+/* Logical Replication Apply worker entry point */
+void
+ApplyWorkerMain(Datum main_arg)
+{
+ int worker_slot = DatumGetObjectId(main_arg);
+ MemoryContext oldctx;
+ char originname[NAMEDATALEN];
+ RepOriginId originid;
+ XLogRecPtr origin_startpos;
+ char *err;
+ int server_version;
+ TimeLineID startpointTLI;
+ WalRcvStreamOptions options;
+
+ /* Attach to slot */
+ logicalrep_worker_attach(worker_slot);
+
+ /* Setup signal handling */
+ pqsignal(SIGTERM, logicalrep_worker_sigterm);
+ BackgroundWorkerUnblockSignals();
+
+ /* Initialise stats to a sanish value */
+ MyLogicalRepWorker->last_send_time = MyLogicalRepWorker->last_recv_time =
+ MyLogicalRepWorker->reply_time = GetCurrentTimestamp();
+
+ /* Make it easy to identify our processes. */
+ SetConfigOption("application_name", MyBgworkerEntry->bgw_name,
+ PGC_USERSET, PGC_S_SESSION);
+
+ /* Load the libpq-specific functions */
+ load_file("libpqwalreceiver", false);
+
+ Assert(CurrentResourceOwner == NULL);
+ CurrentResourceOwner = ResourceOwnerCreate(NULL,
+ "logical replication apply");
+
+ /* Run as replica session replication role. */
+ SetConfigOption("session_replication_role", "replica",
+ PGC_SUSET, PGC_S_OVERRIDE);
+
+ /* Connect to our database. */
+ BackgroundWorkerInitializeConnectionByOid(MyLogicalRepWorker->dbid,
+ MyLogicalRepWorker->userid);
+
+ /* Load the subscription into persistent memory context. */
+ CreateCacheMemoryContext();
+ ApplyCacheContext = AllocSetContextCreate(CacheMemoryContext,
+ "ApplyCacheContext",
+ ALLOCSET_DEFAULT_SIZES);
+ StartTransactionCommand();
+ oldctx = MemoryContextSwitchTo(ApplyCacheContext);
+ MySubscription = GetSubscription(MyLogicalRepWorker->subid, false);
+ MySubscriptionValid = true;
+ MemoryContextSwitchTo(oldctx);
+
+ if (!MySubscription->enabled)
+ {
+ ereport(LOG,
+ (errmsg("logical replication worker for subscription \"%s\" will not "
+ "start because the subscription was disabled during startup",
+ MySubscription->name)));
+
+ proc_exit(0);
+ }
+
+ /* Keep us informed about subscription changes. */
+ CacheRegisterSyscacheCallback(SUBSCRIPTIONOID,
+ subscription_change_cb,
+ (Datum) 0);
+
+ ereport(LOG,
+ (errmsg("logical replication apply for subscription \"%s\" has started",
+ MySubscription->name)));
+
+ /* Setup replication origin tracking. */
+ snprintf(originname, sizeof(originname), "pg_%u", MySubscription->oid);
+ originid = replorigin_by_name(originname, true);
+ if (!OidIsValid(originid))
+ originid = replorigin_create(originname);
+ replorigin_session_setup(originid);
+ replorigin_session_origin = originid;
+ origin_startpos = replorigin_session_get_progress(false);
+
+ CommitTransactionCommand();
+
+ /* Connect to the origin and start the replication. */
+ elog(DEBUG1, "connecting to publisher using connection string \"%s\"",
+ MySubscription->conninfo);
+ wrconn = walrcv_connect(MySubscription->conninfo, true,
+ MySubscription->name, &err);
+ if (wrconn == NULL)
+ ereport(ERROR,
+ (errmsg("could not connect to the publisher: %s", err)));
+
+ /*
+ * We don't really use the output identify_system for anything
+ * but it does some initializations on the upstream so let's still
+ * call it.
+ */
+ (void) walrcv_identify_system(wrconn, &startpointTLI, &server_version);
+
+ /* Build logical replication streaming options. */
+ options.logical = true;
+ options.startpoint = origin_startpos;
+ options.slotname = MySubscription->slotname;
+ options.proto.logical.proto_version = LOGICALREP_PROTO_VERSION_NUM;
+ options.proto.logical.publication_names = MySubscription->publications;
+
+ /* Start streaming from the slot. */
+ walrcv_startstreaming(wrconn, &options);
+
+ /* Run the main loop. */
+ ApplyLoop();
+
+ walrcv_disconnect(wrconn);
+
+ /* We should only get here if we received SIGTERM */
+ proc_exit(0);
+}
diff --git a/src/backend/replication/pgoutput/Makefile b/src/backend/replication/pgoutput/Makefile
new file mode 100644
index 0000000000..1b68e2b4b6
--- /dev/null
+++ b/src/backend/replication/pgoutput/Makefile
@@ -0,0 +1,32 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+# Makefile for src/backend/replication/pgoutput
+#
+# IDENTIFICATION
+# src/backend/replication/pgoutput
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/replication/pgoutput
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+
+override CPPFLAGS := -I$(srcdir) $(CPPFLAGS)
+
+OBJS = pgoutput.o $(WIN32RES)
+PGFILEDESC = "pgoutput - standard logical replication output plugin"
+NAME = pgoutput
+
+all: all-shared-lib
+
+include $(top_srcdir)/src/Makefile.shlib
+
+install: all installdirs install-lib
+
+installdirs: installdirs-lib
+
+uninstall: uninstall-lib
+
+clean distclean maintainer-clean: clean-lib
+ rm -f $(OBJS)
diff --git a/src/backend/replication/pgoutput/pgoutput.c b/src/backend/replication/pgoutput/pgoutput.c
new file mode 100644
index 0000000000..04dde5d494
--- /dev/null
+++ b/src/backend/replication/pgoutput/pgoutput.c
@@ -0,0 +1,596 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgoutput.c
+ * Logical Replication output plugin
+ *
+ * Copyright (c) 2012-2015, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/replication/pgoutput/pgoutput.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/pg_publication.h"
+
+#include "replication/logical.h"
+#include "replication/logicalproto.h"
+#include "replication/origin.h"
+#include "replication/pgoutput.h"
+
+#include "utils/builtins.h"
+#include "utils/inval.h"
+#include "utils/int8.h"
+#include "utils/memutils.h"
+#include "utils/syscache.h"
+
+PG_MODULE_MAGIC;
+
+extern void _PG_output_plugin_init(OutputPluginCallbacks *cb);
+
+static void pgoutput_startup(LogicalDecodingContext * ctx,
+ OutputPluginOptions *opt, bool is_init);
+static void pgoutput_shutdown(LogicalDecodingContext * ctx);
+static void pgoutput_begin_txn(LogicalDecodingContext *ctx,
+ ReorderBufferTXN *txn);
+static void pgoutput_commit_txn(LogicalDecodingContext *ctx,
+ ReorderBufferTXN *txn, XLogRecPtr commit_lsn);
+static void pgoutput_change(LogicalDecodingContext *ctx,
+ ReorderBufferTXN *txn, Relation rel,
+ ReorderBufferChange *change);
+static bool pgoutput_origin_filter(LogicalDecodingContext *ctx,
+ RepOriginId origin_id);
+
+static bool publications_valid;
+
+static List *LoadPublications(List *pubnames);
+static void publication_invalidation_cb(Datum arg, int cacheid,
+ uint32 hashvalue);
+
+/* Entry in the map used to remember which relation schemas we sent. */
+typedef struct RelationSyncEntry
+{
+ Oid relid; /* relation oid */
+ bool schema_sent; /* did we send the schema? */
+ bool replicate_valid;
+ PublicationActions pubactions;
+} RelationSyncEntry;
+
+/* Map used to remember which relation schemas we sent. */
+static HTAB *RelationSyncCache = NULL;
+
+static void init_rel_sync_cache(MemoryContext decoding_context);
+static RelationSyncEntry *get_rel_sync_entry(PGOutputData *data, Oid relid);
+static void rel_sync_cache_relation_cb(Datum arg, Oid relid);
+static void rel_sync_cache_publication_cb(Datum arg, int cacheid,
+ uint32 hashvalue);
+
+/*
+ * Specify output plugin callbacks
+ */
+void
+_PG_output_plugin_init(OutputPluginCallbacks *cb)
+{
+ AssertVariableIsOfType(&_PG_output_plugin_init, LogicalOutputPluginInit);
+
+ cb->startup_cb = pgoutput_startup;
+ cb->begin_cb = pgoutput_begin_txn;
+ cb->change_cb = pgoutput_change;
+ cb->commit_cb = pgoutput_commit_txn;
+ cb->filter_by_origin_cb = pgoutput_origin_filter;
+ cb->shutdown_cb = pgoutput_shutdown;
+}
+
+static void
+parse_output_parameters(List *options, uint32 *protocol_version,
+ List **publication_names)
+{
+ ListCell *lc;
+ bool protocol_version_given = false;
+ bool publication_names_given = false;
+
+ foreach(lc, options)
+ {
+ DefElem *defel = (DefElem *) lfirst(lc);
+
+ Assert(defel->arg == NULL || IsA(defel->arg, String));
+
+ /* Check each param, whether or not we recognise it */
+ if (strcmp(defel->defname, "proto_version") == 0)
+ {
+ int64 parsed;
+
+ if (protocol_version_given)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+ protocol_version_given = true;
+
+ if (!scanint8(strVal(defel->arg), true, &parsed))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid proto_version")));
+
+ if (parsed > PG_UINT32_MAX || parsed < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("proto_verson \"%s\" out of range",
+ strVal(defel->arg))));
+
+ *protocol_version = (uint32) parsed;
+ }
+ else if (strcmp(defel->defname, "publication_names") == 0)
+ {
+ if (publication_names_given)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+ publication_names_given = true;
+
+ if (!SplitIdentifierString(strVal(defel->arg), ',',
+ publication_names))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_NAME),
+ errmsg("invalid publication_names syntax")));
+ }
+ else
+ elog(ERROR, "unrecognized pgoutput option: %s", defel->defname);
+ }
+}
+
+/*
+ * Initialize this plugin
+ */
+static void
+pgoutput_startup(LogicalDecodingContext * ctx, OutputPluginOptions *opt,
+ bool is_init)
+{
+ PGOutputData *data = palloc0(sizeof(PGOutputData));
+
+ /* Create our memory context for private allocations. */
+ data->context = AllocSetContextCreate(ctx->context,
+ "logical replication output context",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+
+ ctx->output_plugin_private = data;
+
+ /* This plugin uses binary protocol. */
+ opt->output_type = OUTPUT_PLUGIN_BINARY_OUTPUT;
+
+ /*
+ * This is replication start and not slot initialization.
+ *
+ * Parse and validate options passed by the client.
+ */
+ if (!is_init)
+ {
+ /* Parse the params and ERROR if we see any we don't recognise */
+ parse_output_parameters(ctx->output_plugin_options,
+ &data->protocol_version,
+ &data->publication_names);
+
+ /* Check if we support requested protol */
+ if (data->protocol_version != LOGICALREP_PROTO_VERSION_NUM)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("client sent proto_version=%d but we only support protocol %d or lower",
+ data->protocol_version, LOGICALREP_PROTO_VERSION_NUM)));
+
+ if (data->protocol_version < LOGICALREP_PROTO_MIN_VERSION_NUM)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("client sent proto_version=%d but we only support protocol %d or higher",
+ data->protocol_version, LOGICALREP_PROTO_MIN_VERSION_NUM)));
+
+ if (list_length(data->publication_names) < 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("publication_names parameter missing")));
+
+ /* Init publication state. */
+ data->publications = NIL;
+ publications_valid = false;
+ CacheRegisterSyscacheCallback(PUBLICATIONOID,
+ publication_invalidation_cb,
+ (Datum) 0);
+
+ /* Initialize relation schema cache. */
+ init_rel_sync_cache(CacheMemoryContext);
+ }
+}
+
+/*
+ * BEGIN callback
+ */
+static void
+pgoutput_begin_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn)
+{
+ bool send_replication_origin = txn->origin_id != InvalidRepOriginId;
+
+ OutputPluginPrepareWrite(ctx, !send_replication_origin);
+ logicalrep_write_begin(ctx->out, txn);
+
+ if (send_replication_origin)
+ {
+ char *origin;
+
+ /* Message boundary */
+ OutputPluginWrite(ctx, false);
+ OutputPluginPrepareWrite(ctx, true);
+
+ /*
+ * XXX: which behaviour we want here?
+ *
+ * Alternatives:
+ * - don't send origin message if origin name not found
+ * (that's what we do now)
+ * - throw error - that will break replication, not good
+ * - send some special "unknown" origin
+ */
+ if (replorigin_by_oid(txn->origin_id, true, &origin))
+ logicalrep_write_origin(ctx->out, origin, txn->origin_lsn);
+ }
+
+ OutputPluginWrite(ctx, true);
+}
+
+/*
+ * COMMIT callback
+ */
+static void
+pgoutput_commit_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
+ XLogRecPtr commit_lsn)
+{
+ OutputPluginPrepareWrite(ctx, true);
+ logicalrep_write_commit(ctx->out, txn, commit_lsn);
+ OutputPluginWrite(ctx, true);
+}
+
+/*
+ * Sends the decoded DML over wire.
+ */
+static void
+pgoutput_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
+ Relation relation, ReorderBufferChange *change)
+{
+ PGOutputData *data = (PGOutputData *) ctx->output_plugin_private;
+ MemoryContext old;
+ RelationSyncEntry *relentry;
+
+ relentry = get_rel_sync_entry(data, RelationGetRelid(relation));
+
+ /* First check the table filter */
+ switch (change->action)
+ {
+ case REORDER_BUFFER_CHANGE_INSERT:
+ if (!relentry->pubactions.pubinsert)
+ return;
+ break;
+ case REORDER_BUFFER_CHANGE_UPDATE:
+ if (!relentry->pubactions.pubupdate)
+ return;
+ break;
+ case REORDER_BUFFER_CHANGE_DELETE:
+ if (!relentry->pubactions.pubdelete)
+ return;
+ break;
+ default:
+ Assert(false);
+ }
+
+ /* Avoid leaking memory by using and resetting our own context */
+ old = MemoryContextSwitchTo(data->context);
+
+ /*
+ * Write the relation schema if the current schema haven't been sent yet.
+ */
+ if (!relentry->schema_sent)
+ {
+ TupleDesc desc;
+ int i;
+
+ desc = RelationGetDescr(relation);
+
+ /*
+ * Write out type info if needed. We do that only for user created
+ * types.
+ */
+ for (i = 0; i < desc->natts; i++)
+ {
+ Form_pg_attribute att = desc->attrs[i];
+
+ if (att->attisdropped)
+ continue;
+
+ if (att->atttypid < FirstNormalObjectId)
+ continue;
+
+ OutputPluginPrepareWrite(ctx, false);
+ logicalrep_write_typ(ctx->out, att->atttypid);
+ OutputPluginWrite(ctx, false);
+ }
+
+ OutputPluginPrepareWrite(ctx, false);
+ logicalrep_write_rel(ctx->out, relation);
+ OutputPluginWrite(ctx, false);
+ relentry->schema_sent = true;
+ }
+
+ /* Send the data */
+ switch (change->action)
+ {
+ case REORDER_BUFFER_CHANGE_INSERT:
+ OutputPluginPrepareWrite(ctx, true);
+ logicalrep_write_insert(ctx->out, relation,
+ &change->data.tp.newtuple->tuple);
+ OutputPluginWrite(ctx, true);
+ break;
+ case REORDER_BUFFER_CHANGE_UPDATE:
+ {
+ HeapTuple oldtuple = change->data.tp.oldtuple ?
+ &change->data.tp.oldtuple->tuple : NULL;
+
+ OutputPluginPrepareWrite(ctx, true);
+ logicalrep_write_update(ctx->out, relation, oldtuple,
+ &change->data.tp.newtuple->tuple);
+ OutputPluginWrite(ctx, true);
+ break;
+ }
+ case REORDER_BUFFER_CHANGE_DELETE:
+ if (change->data.tp.oldtuple)
+ {
+ OutputPluginPrepareWrite(ctx, true);
+ logicalrep_write_delete(ctx->out, relation,
+ &change->data.tp.oldtuple->tuple);
+ OutputPluginWrite(ctx, true);
+ }
+ else
+ elog(DEBUG1, "didn't send DELETE change because of missing oldtuple");
+ break;
+ default:
+ Assert(false);
+ }
+
+ /* Cleanup */
+ MemoryContextSwitchTo(old);
+ MemoryContextReset(data->context);
+}
+
+/*
+ * Currently we always forward.
+ */
+static bool
+pgoutput_origin_filter(LogicalDecodingContext *ctx,
+ RepOriginId origin_id)
+{
+ return false;
+}
+
+/*
+ * Shutdown the output plugin.
+ *
+ * Note, we don't need to clean the data->context as it's child context
+ * of the ctx->context so it will be cleaned up by logical decoding machinery.
+ */
+static void
+pgoutput_shutdown(LogicalDecodingContext * ctx)
+{
+ if (RelationSyncCache)
+ {
+ hash_destroy(RelationSyncCache);
+ RelationSyncCache = NULL;
+ }
+}
+
+/*
+ * Load publications from the list of publication names.
+ */
+static List *
+LoadPublications(List *pubnames)
+{
+ List *result = NIL;
+ ListCell *lc;
+
+ foreach (lc, pubnames)
+ {
+ char *pubname = (char *) lfirst(lc);
+ Publication *pub = GetPublicationByName(pubname, false);
+
+ result = lappend(result, pub);
+ }
+
+ return result;
+}
+
+/*
+ * Publication cache invalidation callback.
+ */
+static void
+publication_invalidation_cb(Datum arg, int cacheid, uint32 hashvalue)
+{
+ publications_valid = false;
+
+ /*
+ * Also invalidate per-relation cache so that next time the filtering
+ * info is checked it will be updated with the new publication
+ * settings.
+ */
+ rel_sync_cache_publication_cb(arg, cacheid, hashvalue);
+}
+
+/*
+ * Initialize the relation schema sync cache for a decoding session.
+ *
+ * The hash table is destoyed at the end of a decoding session. While
+ * relcache invalidations still exist and will still be invoked, they
+ * will just see the null hash table global and take no action.
+ */
+static void
+init_rel_sync_cache(MemoryContext cachectx)
+{
+ HASHCTL ctl;
+ MemoryContext old_ctxt;
+
+ if (RelationSyncCache != NULL)
+ return;
+
+ /* Make a new hash table for the cache */
+ MemSet(&ctl, 0, sizeof(ctl));
+ ctl.keysize = sizeof(Oid);
+ ctl.entrysize = sizeof(RelationSyncEntry);
+ ctl.hcxt = cachectx;
+
+ old_ctxt = MemoryContextSwitchTo(cachectx);
+ RelationSyncCache = hash_create("logical replication output relation cache",
+ 128, &ctl,
+ HASH_ELEM | HASH_CONTEXT | HASH_BLOBS);
+ (void) MemoryContextSwitchTo(old_ctxt);
+
+ Assert(RelationSyncCache != NULL);
+
+ CacheRegisterRelcacheCallback(rel_sync_cache_relation_cb, (Datum) 0);
+ CacheRegisterSyscacheCallback(PUBLICATIONRELMAP,
+ rel_sync_cache_publication_cb,
+ (Datum) 0);
+}
+
+/*
+ * Find or create entry in the relation schema cache.
+ */
+static RelationSyncEntry *
+get_rel_sync_entry(PGOutputData *data, Oid relid)
+{
+ RelationSyncEntry *entry;
+ bool found;
+ MemoryContext oldctx;
+
+ Assert(RelationSyncCache != NULL);
+
+ /* Find cached function info, creating if not found */
+ oldctx = MemoryContextSwitchTo(CacheMemoryContext);
+ entry = (RelationSyncEntry *) hash_search(RelationSyncCache,
+ (void *) &relid,
+ HASH_ENTER, &found);
+ MemoryContextSwitchTo(oldctx);
+ Assert(entry != NULL);
+
+ /* Not found means schema wasn't sent */
+ if (!found || !entry->replicate_valid)
+ {
+ List *pubids = GetRelationPublications(relid);
+ ListCell *lc;
+
+ /* Reload publications if needed before use. */
+ if (!publications_valid)
+ {
+ oldctx = MemoryContextSwitchTo(CacheMemoryContext);
+ if (data->publications)
+ list_free_deep(data->publications);
+
+ data->publications = LoadPublications(data->publication_names);
+ MemoryContextSwitchTo(oldctx);
+ publications_valid = true;
+ }
+
+ /*
+ * Build publication cache. We can't use one provided by relcache
+ * as relcache considers all publications given relation is in, but
+ * here we only need to consider ones that the subscriber requested.
+ */
+ entry->pubactions.pubinsert = entry->pubactions.pubupdate =
+ entry->pubactions.pubdelete = false;
+
+ foreach(lc, data->publications)
+ {
+ Publication *pub = lfirst(lc);
+
+ if (pub->alltables || list_member_oid(pubids, pub->oid))
+ {
+ entry->pubactions.pubinsert |= pub->pubactions.pubinsert;
+ entry->pubactions.pubupdate |= pub->pubactions.pubupdate;
+ entry->pubactions.pubdelete |= pub->pubactions.pubdelete;
+ }
+
+ if (entry->pubactions.pubinsert && entry->pubactions.pubupdate &&
+ entry->pubactions.pubdelete)
+ break;
+ }
+
+ list_free(pubids);
+
+ entry->replicate_valid = true;
+ }
+
+ if (!found)
+ entry->schema_sent = false;
+
+ return entry;
+}
+
+/*
+ * Relcache invalidation callback
+ */
+static void
+rel_sync_cache_relation_cb(Datum arg, Oid relid)
+{
+ RelationSyncEntry *entry;
+
+ /*
+ * We can get here if the plugin was used in SQL interface as the
+ * RelSchemaSyncCache is detroyed when the decoding finishes, but there
+ * is no way to unregister the relcache invalidation callback.
+ */
+ if (RelationSyncCache == NULL)
+ return;
+
+ /*
+ * Nobody keeps pointers to entries in this hash table around outside
+ * logical decoding callback calls - but invalidation events can come in
+ * *during* a callback if we access the relcache in the callback. Because
+ * of that we must mark the cache entry as invalid but not remove it from
+ * the hash while it could still be referenced, then prune it at a later
+ * safe point.
+ *
+ * Getting invalidations for relations that aren't in the table is
+ * entirely normal, since there's no way to unregister for an
+ * invalidation event. So we don't care if it's found or not.
+ */
+ entry = (RelationSyncEntry *) hash_search(RelationSyncCache, &relid,
+ HASH_FIND, NULL);
+
+ /*
+ * Reset schema sent status as the relation definition may have
+ * changed.
+ */
+ if (entry != NULL)
+ entry->schema_sent = false;
+}
+
+/*
+ * Publication relation map syscache invalidation callback
+ */
+static void
+rel_sync_cache_publication_cb(Datum arg, int cacheid, uint32 hashvalue)
+{
+ HASH_SEQ_STATUS status;
+ RelationSyncEntry *entry;
+
+ /*
+ * We can get here if the plugin was used in SQL interface as the
+ * RelSchemaSyncCache is detroyed when the decoding finishes, but there
+ * is no way to unregister the relcache invalidation callback.
+ */
+ if (RelationSyncCache == NULL)
+ return;
+
+ /*
+ * There is no way to find which entry in our cache the hash belongs to
+ * so mark the whole cache as invalid.
+ */
+ hash_seq_init(&status, RelationSyncCache);
+ while ((entry = (RelationSyncEntry *) hash_seq_search(&status)) != NULL)
+ entry->replicate_valid = false;
+}
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index c6b54ec36a..0e4a4b9d19 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -196,6 +196,7 @@ WalReceiverMain(void)
WalRcvData *walrcv = WalRcv;
TimestampTz last_recv_timestamp;
bool ping_sent;
+ char *err;
/*
* WalRcv should be set up already (if we are a backend, we inherit this
@@ -293,7 +294,10 @@ WalReceiverMain(void)
/* Establish the connection to the primary for XLOG streaming */
EnableWalRcvImmediateExit();
- wrconn = walrcv_connect(conninfo, false, "walreceiver");
+ wrconn = walrcv_connect(conninfo, false, "walreceiver", &err);
+ if (!wrconn)
+ ereport(ERROR,
+ (errmsg("could not connect to the primary server: %s", err)));
DisableWalRcvImmediateExit();
/*
@@ -316,13 +320,16 @@ WalReceiverMain(void)
{
char *primary_sysid;
char standby_sysid[32];
+ int server_version;
+ WalRcvStreamOptions options;
/*
* Check that we're connected to a valid server using the
- * IDENTIFY_SYSTEM replication command,
+ * IDENTIFY_SYSTEM replication command.
*/
EnableWalRcvImmediateExit();
- primary_sysid = walrcv_identify_system(wrconn, &primaryTLI);
+ primary_sysid = walrcv_identify_system(wrconn, &primaryTLI,
+ &server_version);
snprintf(standby_sysid, sizeof(standby_sysid), UINT64_FORMAT,
GetSystemIdentifier());
@@ -368,9 +375,12 @@ WalReceiverMain(void)
* history file, bump recovery target timeline, and ask us to restart
* on the new timeline.
*/
+ options.logical = false;
+ options.startpoint = startpoint;
+ options.slotname = slotname[0] != '\0' ? slotname : NULL;
+ options.proto.physical.startpointTLI = startpointTLI;
ThisTimeLineID = startpointTLI;
- if (walrcv_startstreaming(wrconn, startpointTLI, startpoint,
- slotname[0] != '\0' ? slotname : NULL))
+ if (walrcv_startstreaming(wrconn, &options))
{
if (first_stream)
ereport(LOG,
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 00f5ae9e92..2d1ed143e0 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -28,6 +28,7 @@
#include "postmaster/bgworker_internals.h"
#include "postmaster/bgwriter.h"
#include "postmaster/postmaster.h"
+#include "replication/logicallauncher.h"
#include "replication/slot.h"
#include "replication/walreceiver.h"
#include "replication/walsender.h"
@@ -143,6 +144,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
size = add_size(size, ReplicationOriginShmemSize());
size = add_size(size, WalSndShmemSize());
size = add_size(size, WalRcvShmemSize());
+ size = add_size(size, ApplyLauncherShmemSize());
size = add_size(size, SnapMgrShmemSize());
size = add_size(size, BTreeShmemSize());
size = add_size(size, SyncScanShmemSize());
@@ -258,6 +260,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
ReplicationOriginShmemInit();
WalSndShmemInit();
WalRcvShmemInit();
+ ApplyLauncherShmemInit();
/*
* Set up other modules that need some shared memory space
diff --git a/src/backend/storage/lmgr/lwlocknames.txt b/src/backend/storage/lmgr/lwlocknames.txt
index dd0418246a..c95ca5b2e1 100644
--- a/src/backend/storage/lmgr/lwlocknames.txt
+++ b/src/backend/storage/lmgr/lwlocknames.txt
@@ -48,3 +48,5 @@ ReplicationOriginLock 40
MultiXactTruncationLock 41
OldSnapshotTimeMapLock 42
BackendRandomLock 43
+LogicalRepLauncherLock 44
+LogicalRepWorkerLock 45
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index 1492101336..0306247177 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -44,9 +44,11 @@
#include "commands/portalcmds.h"
#include "commands/prepare.h"
#include "commands/proclang.h"
+#include "commands/publicationcmds.h"
#include "commands/schemacmds.h"
#include "commands/seclabel.h"
#include "commands/sequence.h"
+#include "commands/subscriptioncmds.h"
#include "commands/tablecmds.h"
#include "commands/tablespace.h"
#include "commands/trigger.h"
@@ -209,6 +211,11 @@ check_xact_readonly(Node *parsetree)
case T_CreateForeignTableStmt:
case T_ImportForeignSchemaStmt:
case T_SecLabelStmt:
+ case T_CreatePublicationStmt:
+ case T_AlterPublicationStmt:
+ case T_CreateSubscriptionStmt:
+ case T_AlterSubscriptionStmt:
+ case T_DropSubscriptionStmt:
PreventCommandIfReadOnly(CreateCommandTag(parsetree));
PreventCommandIfParallelMode(CreateCommandTag(parsetree));
break;
@@ -1578,6 +1585,33 @@ ProcessUtilitySlow(ParseState *pstate,
address = CreateAccessMethod((CreateAmStmt *) parsetree);
break;
+ case T_CreatePublicationStmt:
+ address = CreatePublication((CreatePublicationStmt *) parsetree);
+ break;
+
+ case T_AlterPublicationStmt:
+ AlterPublication((AlterPublicationStmt *) parsetree);
+ /*
+ * AlterPublication calls EventTriggerCollectSimpleCommand
+ * directly
+ */
+ commandCollected = true;
+ break;
+
+ case T_CreateSubscriptionStmt:
+ address = CreateSubscription((CreateSubscriptionStmt *) parsetree);
+ break;
+
+ case T_AlterSubscriptionStmt:
+ address = AlterSubscription((AlterSubscriptionStmt *) parsetree);
+ break;
+
+ case T_DropSubscriptionStmt:
+ DropSubscription((DropSubscriptionStmt *) parsetree);
+ /* no commands stashed for DROP */
+ commandCollected = true;
+ break;
+
default:
elog(ERROR, "unrecognized node type: %d",
(int) nodeTag(parsetree));
@@ -1941,6 +1975,12 @@ AlterObjectTypeCommandTag(ObjectType objtype)
case OBJECT_MATVIEW:
tag = "ALTER MATERIALIZED VIEW";
break;
+ case OBJECT_PUBLICATION:
+ tag = "ALTER PUBLICATION";
+ break;
+ case OBJECT_SUBSCRIPTION:
+ tag = "ALTER SUBSCRIPTION";
+ break;
default:
tag = "???";
break;
@@ -2232,6 +2272,9 @@ CreateCommandTag(Node *parsetree)
case OBJECT_ACCESS_METHOD:
tag = "DROP ACCESS METHOD";
break;
+ case OBJECT_PUBLICATION:
+ tag = "DROP PUBLICATION";
+ break;
default:
tag = "???";
}
@@ -2602,6 +2645,26 @@ CreateCommandTag(Node *parsetree)
tag = "CREATE ACCESS METHOD";
break;
+ case T_CreatePublicationStmt:
+ tag = "CREATE PUBLICATION";
+ break;
+
+ case T_AlterPublicationStmt:
+ tag = "ALTER PUBLICATION";
+ break;
+
+ case T_CreateSubscriptionStmt:
+ tag = "CREATE SUBSCRIPTION";
+ break;
+
+ case T_AlterSubscriptionStmt:
+ tag = "ALTER SUBSCRIPTION";
+ break;
+
+ case T_DropSubscriptionStmt:
+ tag = "DROP SUBSCRIPTION";
+ break;
+
case T_PrepareStmt:
tag = "PREPARE";
break;
@@ -3166,6 +3229,26 @@ GetCommandLogLevel(Node *parsetree)
lev = LOGSTMT_DDL;
break;
+ case T_CreatePublicationStmt:
+ lev = LOGSTMT_DDL;
+ break;
+
+ case T_AlterPublicationStmt:
+ lev = LOGSTMT_DDL;
+ break;
+
+ case T_CreateSubscriptionStmt:
+ lev = LOGSTMT_DDL;
+ break;
+
+ case T_AlterSubscriptionStmt:
+ lev = LOGSTMT_DDL;
+ break;
+
+ case T_DropSubscriptionStmt:
+ lev = LOGSTMT_DDL;
+ break;
+
/* already-planned queries */
case T_PlannedStmt:
{
diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c
index 1f50282803..11f9218f66 100644
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -375,11 +375,16 @@ AddRelcacheInvalidationMessage(InvalidationListHeader *hdr,
{
SharedInvalidationMessage msg;
- /* Don't add a duplicate item */
- /* We assume dbId need not be checked because it will never change */
+ /*
+ * Don't add a duplicate item.
+ * We assume dbId need not be checked because it will never change.
+ * InvalidOid for relId means all relations so we don't need to add
+ * individual ones when it is present.
+ */
ProcessMessageList(hdr->rclist,
if (msg->rc.id == SHAREDINVALRELCACHE_ID &&
- msg->rc.relId == relId)
+ (msg->rc.relId == relId ||
+ msg->rc.relId == InvalidOid))
return);
/* OK, add the item */
@@ -509,8 +514,10 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId)
/*
* If the relation being invalidated is one of those cached in the local
* relcache init file, mark that we need to zap that file at commit.
+ * Same is true when we are invalidating whole relcache.
*/
- if (OidIsValid(dbId) && RelationIdIsInInitFile(relId))
+ if (OidIsValid(dbId) &&
+ (RelationIdIsInInitFile(relId) || relId == InvalidOid))
transInvalInfo->RelcacheInitFileInval = true;
}
@@ -565,7 +572,10 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
{
int i;
- RelationCacheInvalidateEntry(msg->rc.relId);
+ if (msg->rc.relId == InvalidOid)
+ RelationCacheInvalidate();
+ else
+ RelationCacheInvalidateEntry(msg->rc.relId);
for (i = 0; i < relcache_callback_count; i++)
{
@@ -1226,6 +1236,21 @@ CacheInvalidateRelcache(Relation relation)
RegisterRelcacheInvalidation(databaseId, relationId);
}
+/*
+ * CacheInvalidateRelcacheAll
+ * Register invalidation of the whole relcache at the end of command.
+ *
+ * This is used by alter publication as changes in publications may affect
+ * large number of tables.
+ */
+void
+CacheInvalidateRelcacheAll(void)
+{
+ PrepareInvalidationState();
+
+ RegisterRelcacheInvalidation(InvalidOid, InvalidOid);
+}
+
/*
* CacheInvalidateRelcacheByTuple
* As above, but relation is identified by passing its pg_class tuple.
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 24678fcd48..26ff7e187a 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -53,8 +53,10 @@
#include "catalog/pg_opclass.h"
#include "catalog/pg_partitioned_table.h"
#include "catalog/pg_proc.h"
+#include "catalog/pg_publication.h"
#include "catalog/pg_rewrite.h"
#include "catalog/pg_shseclabel.h"
+#include "catalog/pg_subscription.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_trigger.h"
#include "catalog/pg_type.h"
@@ -103,6 +105,7 @@ static const FormData_pg_attribute Desc_pg_authid[Natts_pg_authid] = {Schema_pg_
static const FormData_pg_attribute Desc_pg_auth_members[Natts_pg_auth_members] = {Schema_pg_auth_members};
static const FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
static const FormData_pg_attribute Desc_pg_shseclabel[Natts_pg_shseclabel] = {Schema_pg_shseclabel};
+static const FormData_pg_attribute Desc_pg_subscription[Natts_pg_subscription] = {Schema_pg_subscription};
/*
* Hash tables that index the relation cache
@@ -2336,7 +2339,10 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc)
list_free(relation->rd_indexlist);
bms_free(relation->rd_indexattr);
bms_free(relation->rd_keyattr);
+ bms_free(relation->rd_pkattr);
bms_free(relation->rd_idattr);
+ if (relation->rd_pubactions)
+ pfree(relation->rd_pubactions);
if (relation->rd_options)
pfree(relation->rd_options);
if (relation->rd_indextuple)
@@ -3043,6 +3049,7 @@ AtEOXact_cleanup(Relation relation, bool isCommit)
list_free(relation->rd_indexlist);
relation->rd_indexlist = NIL;
relation->rd_oidindex = InvalidOid;
+ relation->rd_pkindex = InvalidOid;
relation->rd_replidindex = InvalidOid;
relation->rd_indexvalid = 0;
}
@@ -3155,6 +3162,7 @@ AtEOSubXact_cleanup(Relation relation, bool isCommit,
list_free(relation->rd_indexlist);
relation->rd_indexlist = NIL;
relation->rd_oidindex = InvalidOid;
+ relation->rd_pkindex = InvalidOid;
relation->rd_replidindex = InvalidOid;
relation->rd_indexvalid = 0;
}
@@ -3588,8 +3596,10 @@ RelationCacheInitializePhase2(void)
false, Natts_pg_auth_members, Desc_pg_auth_members);
formrdesc("pg_shseclabel", SharedSecLabelRelation_Rowtype_Id, true,
false, Natts_pg_shseclabel, Desc_pg_shseclabel);
+ formrdesc("pg_subscription", SubscriptionRelation_Rowtype_Id, true,
+ true, Natts_pg_subscription, Desc_pg_subscription);
-#define NUM_CRITICAL_SHARED_RELS 4 /* fix if you change list above */
+#define NUM_CRITICAL_SHARED_RELS 5 /* fix if you change list above */
}
MemoryContextSwitchTo(oldcxt);
@@ -4425,6 +4435,7 @@ RelationGetIndexList(Relation relation)
oldlist = relation->rd_indexlist;
relation->rd_indexlist = list_copy(result);
relation->rd_oidindex = oidIndex;
+ relation->rd_pkindex = pkeyIndex;
if (replident == REPLICA_IDENTITY_DEFAULT && OidIsValid(pkeyIndex))
relation->rd_replidindex = pkeyIndex;
else if (replident == REPLICA_IDENTITY_INDEX && OidIsValid(candidateIndex))
@@ -4492,7 +4503,7 @@ insert_ordered_oid(List *list, Oid datum)
* to ensure that a correct rd_indexattr set has been cached before first
* calling RelationSetIndexList; else a subsequent inquiry might cause a
* wrong rd_indexattr set to get computed and cached. Likewise, we do not
- * touch rd_keyattr or rd_idattr.
+ * touch rd_keyattr, rd_pkattr or rd_idattr.
*/
void
RelationSetIndexList(Relation relation, List *indexIds, Oid oidIndex)
@@ -4508,7 +4519,11 @@ RelationSetIndexList(Relation relation, List *indexIds, Oid oidIndex)
list_free(relation->rd_indexlist);
relation->rd_indexlist = indexIds;
relation->rd_oidindex = oidIndex;
- /* For the moment, assume the target rel hasn't got a replica index */
+ /*
+ * For the moment, assume the target rel hasn't got a pk or replica
+ * index. We'll load them on demand in the API that wraps access to them.
+ */
+ relation->rd_pkindex = InvalidOid;
relation->rd_replidindex = InvalidOid;
relation->rd_indexvalid = 2; /* mark list as forced */
/* Flag relation as needing eoxact cleanup (to reset the list) */
@@ -4543,6 +4558,27 @@ RelationGetOidIndex(Relation relation)
return relation->rd_oidindex;
}
+/*
+ * RelationGetPrimaryKeyIndex -- get OID of the relation's primary key index
+ *
+ * Returns InvalidOid if there is no such index.
+ */
+Oid
+RelationGetPrimaryKeyIndex(Relation relation)
+{
+ List *ilist;
+
+ if (relation->rd_indexvalid == 0)
+ {
+ /* RelationGetIndexList does the heavy lifting. */
+ ilist = RelationGetIndexList(relation);
+ list_free(ilist);
+ Assert(relation->rd_indexvalid != 0);
+ }
+
+ return relation->rd_pkindex;
+}
+
/*
* RelationGetReplicaIndex -- get OID of the relation's replica identity index
*
@@ -4722,8 +4758,10 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
{
Bitmapset *indexattrs; /* indexed columns */
Bitmapset *uindexattrs; /* columns in unique indexes */
+ Bitmapset *pkindexattrs; /* columns in the primary index */
Bitmapset *idindexattrs; /* columns in the replica identity */
List *indexoidlist;
+ Oid relpkindex;
Oid relreplindex;
ListCell *l;
MemoryContext oldcxt;
@@ -4737,6 +4775,8 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
return bms_copy(relation->rd_indexattr);
case INDEX_ATTR_BITMAP_KEY:
return bms_copy(relation->rd_keyattr);
+ case INDEX_ATTR_BITMAP_PRIMARY_KEY:
+ return bms_copy(relation->rd_pkattr);
case INDEX_ATTR_BITMAP_IDENTITY_KEY:
return bms_copy(relation->rd_idattr);
default:
@@ -4758,12 +4798,14 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
return NULL;
/*
- * Copy the rd_replidindex value computed by RelationGetIndexList before
- * proceeding. This is needed because a relcache flush could occur inside
- * index_open below, resetting the fields managed by RelationGetIndexList.
- * (The values we're computing will still be valid, assuming that caller
- * has a sufficient lock on the relation.)
+ * Copy the rd_pkindex and rd_replidindex value computed by
+ * RelationGetIndexList before proceeding. This is needed because a
+ * relcache flush could occur inside index_open below, resetting the
+ * fields managed by RelationGetIndexList. (The values we're computing
+ * will still be valid, assuming that caller has a sufficient lock on
+ * the relation.)
*/
+ relpkindex = relation->rd_pkindex;
relreplindex = relation->rd_replidindex;
/*
@@ -4778,6 +4820,7 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
*/
indexattrs = NULL;
uindexattrs = NULL;
+ pkindexattrs = NULL;
idindexattrs = NULL;
foreach(l, indexoidlist)
{
@@ -4786,6 +4829,7 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
IndexInfo *indexInfo;
int i;
bool isKey; /* candidate key */
+ bool isPK; /* primary key */
bool isIDKey; /* replica identity index */
indexDesc = index_open(indexOid, AccessShareLock);
@@ -4798,6 +4842,9 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
indexInfo->ii_Expressions == NIL &&
indexInfo->ii_Predicate == NIL;
+ /* Is this a primary key? */
+ isPK = (indexOid == relpkindex);
+
/* Is this index the configured (or default) replica identity? */
isIDKey = (indexOid == relreplindex);
@@ -4815,6 +4862,10 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
uindexattrs = bms_add_member(uindexattrs,
attrnum - FirstLowInvalidHeapAttributeNumber);
+ if (isPK)
+ pkindexattrs = bms_add_member(pkindexattrs,
+ attrnum - FirstLowInvalidHeapAttributeNumber);
+
if (isIDKey)
idindexattrs = bms_add_member(idindexattrs,
attrnum - FirstLowInvalidHeapAttributeNumber);
@@ -4837,6 +4888,8 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
relation->rd_indexattr = NULL;
bms_free(relation->rd_keyattr);
relation->rd_keyattr = NULL;
+ bms_free(relation->rd_pkattr);
+ relation->rd_pkattr = NULL;
bms_free(relation->rd_idattr);
relation->rd_idattr = NULL;
@@ -4849,6 +4902,7 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
*/
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
relation->rd_keyattr = bms_copy(uindexattrs);
+ relation->rd_pkattr = bms_copy(pkindexattrs);
relation->rd_idattr = bms_copy(idindexattrs);
relation->rd_indexattr = bms_copy(indexattrs);
MemoryContextSwitchTo(oldcxt);
@@ -4860,6 +4914,8 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
return indexattrs;
case INDEX_ATTR_BITMAP_KEY:
return uindexattrs;
+ case INDEX_ATTR_BITMAP_PRIMARY_KEY:
+ return bms_copy(relation->rd_pkattr);
case INDEX_ATTR_BITMAP_IDENTITY_KEY:
return idindexattrs;
default:
@@ -4992,6 +5048,67 @@ RelationGetExclusionInfo(Relation indexRelation,
MemoryContextSwitchTo(oldcxt);
}
+/*
+ * Get publication actions for the given relation.
+ */
+struct PublicationActions *
+GetRelationPublicationActions(Relation relation)
+{
+ List *puboids;
+ ListCell *lc;
+ MemoryContext oldcxt;
+ PublicationActions *pubactions = palloc0(sizeof(PublicationActions));
+
+ if (relation->rd_pubactions)
+ return memcpy(pubactions, relation->rd_pubactions,
+ sizeof(PublicationActions));
+
+ /* Fetch the publication membership info. */
+ puboids = GetRelationPublications(RelationGetRelid(relation));
+ puboids = list_concat_unique_oid(puboids, GetAllTablesPublications());
+
+ foreach(lc, puboids)
+ {
+ Oid pubid = lfirst_oid(lc);
+ HeapTuple tup;
+ Form_pg_publication pubform;
+
+ tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid));
+
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for publication %u", pubid);
+
+ pubform = (Form_pg_publication) GETSTRUCT(tup);
+
+ pubactions->pubinsert |= pubform->pubinsert;
+ pubactions->pubupdate |= pubform->pubupdate;
+ pubactions->pubdelete |= pubform->pubdelete;
+
+ ReleaseSysCache(tup);
+
+ /*
+ * If we know everything is replicated, there is no point to check
+ * for other publications.
+ */
+ if (pubactions->pubinsert && pubactions->pubupdate &&
+ pubactions->pubdelete)
+ break;
+ }
+
+ if (relation->rd_pubactions)
+ {
+ pfree(relation->rd_pubactions);
+ relation->rd_pubactions = NULL;
+ }
+
+ /* Now save copy of the actions in the relcache entry. */
+ oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+ relation->rd_pubactions = palloc(sizeof(PublicationActions));
+ memcpy(relation->rd_pubactions, pubactions, sizeof(PublicationActions));
+ MemoryContextSwitchTo(oldcxt);
+
+ return pubactions;
+}
/*
* Routines to support ereport() reports of relation-related errors
@@ -5407,10 +5524,13 @@ load_relcache_init_file(bool shared)
rel->rd_fkeyvalid = false;
rel->rd_indexlist = NIL;
rel->rd_oidindex = InvalidOid;
+ rel->rd_pkindex = InvalidOid;
rel->rd_replidindex = InvalidOid;
rel->rd_indexattr = NULL;
rel->rd_keyattr = NULL;
+ rel->rd_pkattr = NULL;
rel->rd_idattr = NULL;
+ rel->rd_pubactions = NULL;
rel->rd_createSubid = InvalidSubTransactionId;
rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
rel->rd_amcache = NULL;
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index d634a3b683..bdfaa0ce75 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -50,6 +50,8 @@
#include "catalog/pg_opfamily.h"
#include "catalog/pg_partitioned_table.h"
#include "catalog/pg_proc.h"
+#include "catalog/pg_publication.h"
+#include "catalog/pg_publication_rel.h"
#include "catalog/pg_range.h"
#include "catalog/pg_rewrite.h"
#include "catalog/pg_seclabel.h"
@@ -59,6 +61,7 @@
#include "catalog/pg_shseclabel.h"
#include "catalog/pg_replication_origin.h"
#include "catalog/pg_statistic.h"
+#include "catalog/pg_subscription.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_transform.h"
#include "catalog/pg_ts_config.h"
@@ -658,6 +661,50 @@ static const struct cachedesc cacheinfo[] = {
},
16
},
+ {PublicationRelationId, /* PUBLICATIONOID */
+ PublicationObjectIndexId,
+ 1,
+ {
+ ObjectIdAttributeNumber,
+ 0,
+ 0,
+ 0
+ },
+ 8
+ },
+ {PublicationRelationId, /* PUBLICATIONNAME */
+ PublicationNameIndexId,
+ 1,
+ {
+ Anum_pg_publication_pubname,
+ 0,
+ 0,
+ 0
+ },
+ 8
+ },
+ {PublicationRelRelationId, /* PUBLICATIONREL */
+ PublicationRelObjectIndexId,
+ 1,
+ {
+ ObjectIdAttributeNumber,
+ 0,
+ 0,
+ 0
+ },
+ 64
+ },
+ {PublicationRelRelationId, /* PUBLICATIONRELMAP */
+ PublicationRelMapIndexId,
+ 2,
+ {
+ Anum_pg_publication_rel_prrelid,
+ Anum_pg_publication_rel_prpubid,
+ 0,
+ 0
+ },
+ 64
+ },
{RewriteRelationId, /* RULERELNAME */
RewriteRelRulenameIndexId,
2,
@@ -691,6 +738,28 @@ static const struct cachedesc cacheinfo[] = {
},
128
},
+ {SubscriptionRelationId, /* SUBSCRIPTIONOID */
+ SubscriptionObjectIndexId,
+ 1,
+ {
+ ObjectIdAttributeNumber,
+ 0,
+ 0,
+ 0
+ },
+ 4
+ },
+ {SubscriptionRelationId, /* SUBSCRIPTIONNAME */
+ SubscriptionNameIndexId,
+ 2,
+ {
+ Anum_pg_subscription_subdbid,
+ Anum_pg_subscription_subname,
+ 0,
+ 0
+ },
+ 4
+ },
{TableSpaceRelationId, /* TABLESPACEOID */
TablespaceOidIndexId,
1,
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 4e2bd4c496..15a09dfd4d 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -59,6 +59,7 @@
#include "postmaster/postmaster.h"
#include "postmaster/syslogger.h"
#include "postmaster/walwriter.h"
+#include "replication/logicallauncher.h"
#include "replication/slot.h"
#include "replication/syncrep.h"
#include "replication/walreceiver.h"
@@ -2471,6 +2472,18 @@ static struct config_int ConfigureNamesInt[] =
check_max_worker_processes, NULL, NULL
},
+ {
+ {"max_logical_replication_workers",
+ PGC_POSTMASTER,
+ RESOURCES_ASYNCHRONOUS,
+ gettext_noop("Maximum number of logical replication worker processes."),
+ NULL,
+ },
+ &max_logical_replication_workers,
+ 4, 0, MAX_BACKENDS,
+ NULL, NULL, NULL
+ },
+
{
{"log_rotation_age", PGC_SIGHUP, LOGGING_WHERE,
gettext_noop("Automatic log file rotation will occur after N minutes."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 15669b83c7..661b0fa9b6 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -163,6 +163,7 @@
#max_worker_processes = 8 # (change requires restart)
#max_parallel_workers_per_gather = 2 # taken from max_worker_processes
#max_parallel_workers = 8 # total maximum number of worker_processes
+#max_logical_replication_workers = 4 # taken from max_worker_processes
#old_snapshot_threshold = -1 # 1min-60d; -1 disables; 0 is immediate
# (change requires restart)
#backend_flush_after = 0 # measured in pages, 0 disables
diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c
index d4e36421d2..89530a9f0f 100644
--- a/src/bin/pg_dump/common.c
+++ b/src/bin/pg_dump/common.c
@@ -292,6 +292,18 @@ getSchemaData(Archive *fout, int *numTablesPtr)
write_msg(NULL, "reading partition key information for interesting tables\n");
getTablePartitionKeyInfo(fout, tblinfo, numTables);
+ if (g_verbose)
+ write_msg(NULL, "reading publications\n");
+ getPublications(fout);
+
+ if (g_verbose)
+ write_msg(NULL, "reading publication membership\n");
+ getPublicationTables(fout, tblinfo, numTables);
+
+ if (g_verbose)
+ write_msg(NULL, "reading subscriptions\n");
+ getSubscriptions(fout);
+
*numTablesPtr = numTables;
return tblinfo;
}
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index 7241cdfc44..6480fb8e74 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -119,6 +119,7 @@ typedef struct _restoreOptions
bool *idWanted; /* array showing which dump IDs to emit */
int enable_row_security;
int sequence_data; /* dump sequence data even in schema-only mode */
+ int include_subscriptions;
} RestoreOptions;
typedef struct _dumpOptions
@@ -152,6 +153,8 @@ typedef struct _dumpOptions
int outputNoTablespaces;
int use_setsessauth;
int enable_row_security;
+ int include_subscriptions;
+ int no_create_subscription_slots;
/* default, if no "inclusion" switches appear, is to dump everything */
bool include_everything;
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index b89bd99e49..929f1b592b 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -172,6 +172,7 @@ dumpOptionsFromRestoreOptions(RestoreOptions *ropt)
dopt->include_everything = ropt->include_everything;
dopt->enable_row_security = ropt->enable_row_security;
dopt->sequence_data = ropt->sequence_data;
+ dopt->include_subscriptions = ropt->include_subscriptions;
return dopt;
}
@@ -3266,6 +3267,8 @@ _getObjectDescription(PQExpBuffer buf, TocEntry *te, ArchiveHandle *AH)
strcmp(type, "SCHEMA") == 0 ||
strcmp(type, "FOREIGN DATA WRAPPER") == 0 ||
strcmp(type, "SERVER") == 0 ||
+ strcmp(type, "PUBLICATION") == 0 ||
+ strcmp(type, "SUBSCRIPTION") == 0 ||
strcmp(type, "USER MAPPING") == 0)
{
/* We already know that search_path was set properly */
@@ -3476,7 +3479,9 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData, bool acl_pass)
strcmp(te->desc, "TEXT SEARCH DICTIONARY") == 0 ||
strcmp(te->desc, "TEXT SEARCH CONFIGURATION") == 0 ||
strcmp(te->desc, "FOREIGN DATA WRAPPER") == 0 ||
- strcmp(te->desc, "SERVER") == 0)
+ strcmp(te->desc, "SERVER") == 0 ||
+ strcmp(te->desc, "PUBLICATION") == 0 ||
+ strcmp(te->desc, "SUBSCRIPTION") == 0)
{
PQExpBuffer temp = createPQExpBuffer();
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 883fde1e5a..0bb363957a 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -246,6 +246,9 @@ static void getBlobs(Archive *fout);
static void dumpBlob(Archive *fout, BlobInfo *binfo);
static int dumpBlobs(Archive *fout, void *arg);
static void dumpPolicy(Archive *fout, PolicyInfo *polinfo);
+static void dumpPublication(Archive *fout, PublicationInfo *pubinfo);
+static void dumpPublicationTable(Archive *fout, PublicationRelInfo *pubrinfo);
+static void dumpSubscription(Archive *fout, SubscriptionInfo *subinfo);
static void dumpDatabase(Archive *AH);
static void dumpEncoding(Archive *AH);
static void dumpStdStrings(Archive *AH);
@@ -338,6 +341,7 @@ main(int argc, char **argv)
{"enable-row-security", no_argument, &dopt.enable_row_security, 1},
{"exclude-table-data", required_argument, NULL, 4},
{"if-exists", no_argument, &dopt.if_exists, 1},
+ {"include-subscriptions", no_argument, &dopt.include_subscriptions, 1},
{"inserts", no_argument, &dopt.dump_inserts, 1},
{"lock-wait-timeout", required_argument, NULL, 2},
{"no-tablespaces", no_argument, &dopt.outputNoTablespaces, 1},
@@ -348,6 +352,7 @@ main(int argc, char **argv)
{"snapshot", required_argument, NULL, 6},
{"strict-names", no_argument, &strict_names, 1},
{"use-set-session-authorization", no_argument, &dopt.use_setsessauth, 1},
+ {"no-create-subscription-slots", no_argument, &dopt.no_create_subscription_slots, 1},
{"no-security-labels", no_argument, &dopt.no_security_labels, 1},
{"no-synchronized-snapshots", no_argument, &dopt.no_synchronized_snapshots, 1},
{"no-unlogged-table-data", no_argument, &dopt.no_unlogged_table_data, 1},
@@ -849,6 +854,7 @@ main(int argc, char **argv)
ropt->include_everything = dopt.include_everything;
ropt->enable_row_security = dopt.enable_row_security;
ropt->sequence_data = dopt.sequence_data;
+ ropt->include_subscriptions = dopt.include_subscriptions;
if (compressLevel == -1)
ropt->compression = 0;
@@ -929,7 +935,10 @@ help(const char *progname)
" access to)\n"));
printf(_(" --exclude-table-data=TABLE do NOT dump data for the named table(s)\n"));
printf(_(" --if-exists use IF EXISTS when dropping objects\n"));
+ printf(_(" --include-subscriptions dump logical replication subscriptions\n"));
printf(_(" --inserts dump data as INSERT commands, rather than COPY\n"));
+ printf(_(" --no-create-subscription-slots\n"
+ " do not create replication slots for subscriptions\n"));
printf(_(" --no-security-labels do not dump security label assignments\n"));
printf(_(" --no-synchronized-snapshots do not use synchronized snapshots in parallel jobs\n"));
printf(_(" --no-tablespaces do not dump tablespace assignments\n"));
@@ -3311,6 +3320,449 @@ dumpPolicy(Archive *fout, PolicyInfo *polinfo)
destroyPQExpBuffer(delqry);
}
+/*
+ * getPublications
+ * get information about publications
+ */
+void
+getPublications(Archive *fout)
+{
+ PQExpBuffer query;
+ PGresult *res;
+ PublicationInfo *pubinfo;
+ int i_tableoid;
+ int i_oid;
+ int i_pubname;
+ int i_rolname;
+ int i_puballtables;
+ int i_pubinsert;
+ int i_pubupdate;
+ int i_pubdelete;
+ int i,
+ ntups;
+
+ if (fout->remoteVersion < 100000)
+ return;
+
+ query = createPQExpBuffer();
+
+ resetPQExpBuffer(query);
+
+ /* Get the publications. */
+ appendPQExpBuffer(query,
+ "SELECT p.tableoid, p.oid, p.pubname, "
+ "(%s p.pubowner) AS rolname, "
+ "p.puballtables, p.pubinsert, p.pubupdate, p.pubdelete "
+ "FROM pg_catalog.pg_publication p",
+ username_subquery);
+
+ res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
+
+ ntups = PQntuples(res);
+
+ if (ntups == 0)
+ {
+ /*
+ * There are no publications defined. Clean up and return.
+ */
+ PQclear(res);
+ return;
+ }
+
+ i_tableoid = PQfnumber(res, "tableoid");
+ i_oid = PQfnumber(res, "oid");
+ i_pubname = PQfnumber(res, "pubname");
+ i_rolname = PQfnumber(res, "rolname");
+ i_puballtables = PQfnumber(res, "puballtables");
+ i_pubinsert = PQfnumber(res, "pubinsert");
+ i_pubupdate = PQfnumber(res, "pubupdate");
+ i_pubdelete = PQfnumber(res, "pubdelete");
+
+ pubinfo = pg_malloc(ntups * sizeof(PublicationInfo));
+
+ for (i = 0; i < ntups; i++)
+ {
+ pubinfo[i].dobj.objType = DO_PUBLICATION;
+ pubinfo[i].dobj.catId.tableoid =
+ atooid(PQgetvalue(res, i, i_tableoid));
+ pubinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid));
+ AssignDumpId(&pubinfo[i].dobj);
+ pubinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_pubname));
+ pubinfo[i].rolname = pg_strdup(PQgetvalue(res, i, i_rolname));
+ pubinfo[i].puballtables =
+ (strcmp(PQgetvalue(res, i, i_puballtables), "t") == 0);
+ pubinfo[i].pubinsert =
+ (strcmp(PQgetvalue(res, i, i_pubinsert), "t") == 0);
+ pubinfo[i].pubupdate =
+ (strcmp(PQgetvalue(res, i, i_pubupdate), "t") == 0);
+ pubinfo[i].pubdelete =
+ (strcmp(PQgetvalue(res, i, i_pubdelete), "t") == 0);
+
+ if (strlen(pubinfo[i].rolname) == 0)
+ write_msg(NULL, "WARNING: owner of publication \"%s\" appears to be invalid\n",
+ pubinfo[i].dobj.name);
+ }
+ PQclear(res);
+
+ destroyPQExpBuffer(query);
+}
+
+/*
+ * dumpPublication
+ * dump the definition of the given publication
+ */
+static void
+dumpPublication(Archive *fout, PublicationInfo *pubinfo)
+{
+ DumpOptions *dopt = fout->dopt;
+ PQExpBuffer delq;
+ PQExpBuffer query;
+
+ if (dopt->dataOnly)
+ return;
+
+ delq = createPQExpBuffer();
+ query = createPQExpBuffer();
+
+ appendPQExpBuffer(delq, "DROP PUBLICATION %s;\n",
+ fmtId(pubinfo->dobj.name));
+
+ appendPQExpBuffer(query, "CREATE PUBLICATION %s",
+ fmtId(pubinfo->dobj.name));
+
+ if (pubinfo->puballtables)
+ appendPQExpBufferStr(query, " FOR ALL TABLES");
+
+ appendPQExpBufferStr(query, " WITH (");
+ if (pubinfo->pubinsert)
+ appendPQExpBufferStr(query, "PUBLISH INSERT");
+ else
+ appendPQExpBufferStr(query, "NOPUBLISH INSERT");
+
+ if (pubinfo->pubupdate)
+ appendPQExpBufferStr(query, ", PUBLISH UPDATE");
+ else
+ appendPQExpBufferStr(query, ", NOPUBLISH UPDATE");
+
+ if (pubinfo->pubdelete)
+ appendPQExpBufferStr(query, ", PUBLISH DELETE");
+ else
+ appendPQExpBufferStr(query, ", NOPUBLISH DELETE");
+
+ appendPQExpBufferStr(query, ");\n");
+
+ ArchiveEntry(fout, pubinfo->dobj.catId, pubinfo->dobj.dumpId,
+ pubinfo->dobj.name,
+ NULL,
+ NULL,
+ pubinfo->rolname, false,
+ "PUBLICATION", SECTION_POST_DATA,
+ query->data, delq->data, NULL,
+ NULL, 0,
+ NULL, NULL);
+
+ destroyPQExpBuffer(delq);
+ destroyPQExpBuffer(query);
+}
+
+/*
+ * getPublicationTables
+ * get information about publication membership for dumpable tables.
+ */
+void
+getPublicationTables(Archive *fout, TableInfo tblinfo[], int numTables)
+{
+ PQExpBuffer query;
+ PGresult *res;
+ PublicationRelInfo *pubrinfo;
+ int i_tableoid;
+ int i_oid;
+ int i_pubname;
+ int i,
+ j,
+ ntups;
+
+ if (fout->remoteVersion < 100000)
+ return;
+
+ query = createPQExpBuffer();
+
+ for (i = 0; i < numTables; i++)
+ {
+ TableInfo *tbinfo = &tblinfo[i];
+
+ /* Only plain tables can be aded to publications. */
+ if (tbinfo->relkind != RELKIND_RELATION)
+ continue;
+
+ /*
+ * Ignore publication membership of tables whose definitions are
+ * not to be dumped.
+ */
+ if (!(tbinfo->dobj.dump & DUMP_COMPONENT_DEFINITION))
+ continue;
+
+ if (g_verbose)
+ write_msg(NULL, "reading publication membership for table \"%s.%s\"\n",
+ tbinfo->dobj.namespace->dobj.name,
+ tbinfo->dobj.name);
+
+ resetPQExpBuffer(query);
+
+ /* Get the publication memebership for the table. */
+ appendPQExpBuffer(query,
+ "SELECT pr.tableoid, pr.oid, p.pubname "
+ "FROM pg_catalog.pg_publication_rel pr,"
+ " pg_catalog.pg_publication p "
+ "WHERE pr.prrelid = '%u'"
+ " AND p.oid = pr.prpubid",
+ tbinfo->dobj.catId.oid);
+ res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
+
+ ntups = PQntuples(res);
+
+ if (ntups == 0)
+ {
+ /*
+ * Table is not member of any publications. Clean up and return.
+ */
+ PQclear(res);
+ continue;
+ }
+
+ i_tableoid = PQfnumber(res, "tableoid");
+ i_oid = PQfnumber(res, "oid");
+ i_pubname = PQfnumber(res, "pubname");
+
+ pubrinfo = pg_malloc(ntups * sizeof(PublicationRelInfo));
+
+ for (j = 0; j < ntups; j++)
+ {
+ pubrinfo[j].dobj.objType = DO_PUBLICATION_REL;
+ pubrinfo[j].dobj.catId.tableoid =
+ atooid(PQgetvalue(res, j, i_tableoid));
+ pubrinfo[j].dobj.catId.oid = atooid(PQgetvalue(res, j, i_oid));
+ AssignDumpId(&pubrinfo[j].dobj);
+ pubrinfo[j].dobj.namespace = tbinfo->dobj.namespace;
+ pubrinfo[j].pubname = pg_strdup(PQgetvalue(res, j, i_pubname));
+ pubrinfo[j].pubtable = tbinfo;
+ }
+ PQclear(res);
+ }
+ destroyPQExpBuffer(query);
+}
+
+/*
+ * dumpPublicationTable
+ * dump the definition of the given publication table mapping
+ */
+static void
+dumpPublicationTable(Archive *fout, PublicationRelInfo *pubrinfo)
+{
+ DumpOptions *dopt = fout->dopt;
+ TableInfo *tbinfo = pubrinfo->pubtable;
+ PQExpBuffer query;
+ char *tag;
+
+ if (dopt->dataOnly)
+ return;
+
+ tag = psprintf("%s %s", pubrinfo->pubname, tbinfo->dobj.name);
+
+ query = createPQExpBuffer();
+
+ appendPQExpBuffer(query, "ALTER PUBLICATION %s ADD TABLE",
+ fmtId(pubrinfo->pubname));
+ appendPQExpBuffer(query, " %s;",
+ fmtId(tbinfo->dobj.name));
+
+ /*
+ * There is no point in creating drop query as drop query as the drop
+ * is done by table drop.
+ */
+ ArchiveEntry(fout, pubrinfo->dobj.catId, pubrinfo->dobj.dumpId,
+ tag,
+ tbinfo->dobj.namespace->dobj.name,
+ NULL,
+ "", false,
+ "PUBLICATION TABLE", SECTION_POST_DATA,
+ query->data, "", NULL,
+ NULL, 0,
+ NULL, NULL);
+
+ free(tag);
+ destroyPQExpBuffer(query);
+}
+
+
+/*
+ * getSubscriptions
+ * get information about subscriptions
+ */
+void
+getSubscriptions(Archive *fout)
+{
+ DumpOptions *dopt = fout->dopt;
+ PQExpBuffer query;
+ PGresult *res;
+ SubscriptionInfo *subinfo;
+ int i_tableoid;
+ int i_oid;
+ int i_subname;
+ int i_rolname;
+ int i_subenabled;
+ int i_subconninfo;
+ int i_subslotname;
+ int i_subpublications;
+ int i,
+ ntups;
+
+ if (!dopt->include_subscriptions || fout->remoteVersion < 100000)
+ return;
+
+ query = createPQExpBuffer();
+
+ resetPQExpBuffer(query);
+
+ /* Get the subscriptions in current database. */
+ appendPQExpBuffer(query,
+ "SELECT s.tableoid, s.oid, s.subname,"
+ "(%s s.subowner) AS rolname, s.subenabled, "
+ " s.subconninfo, s.subslotname, s.subpublications "
+ "FROM pg_catalog.pg_subscription s "
+ "WHERE s.subdbid = (SELECT oid FROM pg_catalog.pg_database"
+ " WHERE datname = current_database())",
+ username_subquery);
+ res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
+
+ ntups = PQntuples(res);
+
+ if (ntups == 0)
+ {
+ /*
+ * There are no subscriptions defined. Clean up and return.
+ */
+ PQclear(res);
+ return;
+ }
+
+ i_tableoid = PQfnumber(res, "tableoid");
+ i_oid = PQfnumber(res, "oid");
+ i_subname = PQfnumber(res, "subname");
+ i_rolname = PQfnumber(res, "rolname");
+ i_subenabled = PQfnumber(res, "subenabled");
+ i_subconninfo = PQfnumber(res, "subconninfo");
+ i_subslotname = PQfnumber(res, "subslotname");
+ i_subpublications = PQfnumber(res, "subpublications");
+
+ subinfo = pg_malloc(ntups * sizeof(SubscriptionInfo));
+
+ for (i = 0; i < ntups; i++)
+ {
+ subinfo[i].dobj.objType = DO_SUBSCRIPTION;
+ subinfo[i].dobj.catId.tableoid =
+ atooid(PQgetvalue(res, i, i_tableoid));
+ subinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid));
+ AssignDumpId(&subinfo[i].dobj);
+ subinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_subname));
+ subinfo[i].rolname = pg_strdup(PQgetvalue(res, i, i_rolname));
+ subinfo[i].subenabled =
+ (strcmp(PQgetvalue(res, i, i_subenabled), "t") == 0);
+ subinfo[i].subconninfo = pg_strdup(PQgetvalue(res, i, i_subconninfo));
+ subinfo[i].subslotname = pg_strdup(PQgetvalue(res, i, i_subslotname));
+ subinfo[i].subpublications =
+ pg_strdup(PQgetvalue(res, i, i_subpublications));
+
+ if (strlen(subinfo[i].rolname) == 0)
+ write_msg(NULL, "WARNING: owner of subscription \"%s\" appears to be invalid\n",
+ subinfo[i].dobj.name);
+ }
+ PQclear(res);
+
+ destroyPQExpBuffer(query);
+}
+
+/*
+ * dumpSubscription
+ * dump the definition of the given subscription
+ */
+static void
+dumpSubscription(Archive *fout, SubscriptionInfo *subinfo)
+{
+ DumpOptions *dopt = fout->dopt;
+ PQExpBuffer delq;
+ PQExpBuffer query;
+ PQExpBuffer publications;
+ char **pubnames = NULL;
+ int npubnames = 0;
+ int i;
+
+ if (dopt->dataOnly)
+ return;
+
+ delq = createPQExpBuffer();
+ query = createPQExpBuffer();
+
+ appendPQExpBuffer(delq, "DROP SUBSCRIPTION %s;\n",
+ fmtId(subinfo->dobj.name));
+
+ appendPQExpBuffer(query, "CREATE SUBSCRIPTION %s CONNECTION ",
+ fmtId(subinfo->dobj.name));
+ appendStringLiteralAH(query, subinfo->subconninfo, fout);
+
+ /* Build list of quoted publications and append them to query. */
+ if (!parsePGArray(subinfo->subpublications, &pubnames, &npubnames))
+ {
+ write_msg(NULL,
+ "WARNING: could not parse subpublications array\n");
+ if (pubnames)
+ free(pubnames);
+ pubnames = NULL;
+ npubnames = 0;
+ }
+
+ publications = createPQExpBuffer();
+ for (i = 0; i < npubnames; i++)
+ {
+ if (i > 0)
+ appendPQExpBufferStr(publications, ", ");
+
+ appendPQExpBufferStr(publications, fmtId(pubnames[i]));
+ }
+
+ appendPQExpBuffer(query, " PUBLICATION %s WITH (", publications->data);
+
+ if (subinfo->subenabled)
+ appendPQExpBufferStr(query, "ENABLED");
+ else
+ appendPQExpBufferStr(query, "DISABLED");
+
+ appendPQExpBufferStr(query, ", SLOT NAME = ");
+ appendStringLiteralAH(query, subinfo->subslotname, fout);
+
+ if (dopt->no_create_subscription_slots)
+ appendPQExpBufferStr(query, ", NOCREATE SLOT");
+
+ appendPQExpBufferStr(query, ");\n");
+
+ ArchiveEntry(fout, subinfo->dobj.catId, subinfo->dobj.dumpId,
+ subinfo->dobj.name,
+ NULL,
+ NULL,
+ subinfo->rolname, false,
+ "SUBSCRIPTION", SECTION_POST_DATA,
+ query->data, delq->data, NULL,
+ NULL, 0,
+ NULL, NULL);
+
+ destroyPQExpBuffer(publications);
+ if (pubnames)
+ free(pubnames);
+
+ destroyPQExpBuffer(delq);
+ destroyPQExpBuffer(query);
+}
+
static void
binary_upgrade_set_type_oids_by_type_oid(Archive *fout,
PQExpBuffer upgrade_buffer,
@@ -8752,6 +9204,15 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj)
case DO_POLICY:
dumpPolicy(fout, (PolicyInfo *) dobj);
break;
+ case DO_PUBLICATION:
+ dumpPublication(fout, (PublicationInfo *) dobj);
+ break;
+ case DO_PUBLICATION_REL:
+ dumpPublicationTable(fout, (PublicationRelInfo *) dobj);
+ break;
+ case DO_SUBSCRIPTION:
+ dumpSubscription(fout, (SubscriptionInfo *) dobj);
+ break;
case DO_PRE_DATA_BOUNDARY:
case DO_POST_DATA_BOUNDARY:
/* never dumped, nothing to do */
@@ -16627,6 +17088,9 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs,
case DO_EVENT_TRIGGER:
case DO_DEFAULT_ACL:
case DO_POLICY:
+ case DO_PUBLICATION:
+ case DO_PUBLICATION_REL:
+ case DO_SUBSCRIPTION:
/* Post-data objects: must come after the post-data boundary */
addObjectDependency(dobj, postDataBound->dumpId);
break;
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index 0c920a3907..77de22fcb8 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -79,7 +79,10 @@ typedef enum
DO_POST_DATA_BOUNDARY,
DO_EVENT_TRIGGER,
DO_REFRESH_MATVIEW,
- DO_POLICY
+ DO_POLICY,
+ DO_PUBLICATION,
+ DO_PUBLICATION_REL,
+ DO_SUBSCRIPTION
} DumpableObjectType;
/* component types of an object which can be selected for dumping */
@@ -566,6 +569,43 @@ typedef struct _policyInfo
char *polwithcheck;
} PolicyInfo;
+/*
+ * The PublicationInfo struct is used to represent publications.
+ */
+typedef struct _PublicationInfo
+{
+ DumpableObject dobj;
+ char *rolname;
+ bool puballtables;
+ bool pubinsert;
+ bool pubupdate;
+ bool pubdelete;
+} PublicationInfo;
+
+/*
+ * The PublicationRelInfo struct is used to represent publication table
+ * mapping.
+ */
+typedef struct _PublicationRelInfo
+{
+ DumpableObject dobj;
+ TableInfo *pubtable;
+ char *pubname;
+} PublicationRelInfo;
+
+/*
+ * The SubscriptionInfo struct is used to represent subscription.
+ */
+typedef struct _SubscriptionInfo
+{
+ DumpableObject dobj;
+ char *rolname;
+ bool subenabled;
+ char *subconninfo;
+ char *subslotname;
+ char *subpublications;
+} SubscriptionInfo;
+
/*
* We build an array of these with an entry for each object that is an
* extension member according to pg_depend.
@@ -663,5 +703,9 @@ extern void processExtensionTables(Archive *fout, ExtensionInfo extinfo[],
extern EventTriggerInfo *getEventTriggers(Archive *fout, int *numEventTriggers);
extern void getPolicies(Archive *fout, TableInfo tblinfo[], int numTables);
extern void getTablePartitionKeyInfo(Archive *fout, TableInfo *tblinfo, int numTables);
+extern void getPublications(Archive *fout);
+extern void getPublicationTables(Archive *fout, TableInfo tblinfo[],
+ int numTables);
+extern void getSubscriptions(Archive *fout);
#endif /* PG_DUMP_H */
diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c
index 1db680b950..ea643397ba 100644
--- a/src/bin/pg_dump/pg_dump_sort.c
+++ b/src/bin/pg_dump/pg_dump_sort.c
@@ -71,7 +71,10 @@ static const int dbObjectTypePriority[] =
26, /* DO_POST_DATA_BOUNDARY */
33, /* DO_EVENT_TRIGGER */
34, /* DO_REFRESH_MATVIEW */
- 35 /* DO_POLICY */
+ 35, /* DO_POLICY */
+ 36, /* DO_PUBLICATION */
+ 37, /* DO_PUBLICATION_REL */
+ 38 /* DO_SUBSCRIPTION */
};
static DumpId preDataBoundId;
@@ -1397,6 +1400,21 @@ describeDumpableObject(DumpableObject *obj, char *buf, int bufsize)
"POLICY (ID %d OID %u)",
obj->dumpId, obj->catId.oid);
return;
+ case DO_PUBLICATION:
+ snprintf(buf, bufsize,
+ "PUBLICATION (ID %d OID %u)",
+ obj->dumpId, obj->catId.oid);
+ return;
+ case DO_PUBLICATION_REL:
+ snprintf(buf, bufsize,
+ "PUBLICATION TABLE (ID %d OID %u)",
+ obj->dumpId, obj->catId.oid);
+ return;
+ case DO_SUBSCRIPTION:
+ snprintf(buf, bufsize,
+ "SUBSCRIPTION (ID %d OID %u)",
+ obj->dumpId, obj->catId.oid);
+ return;
case DO_PRE_DATA_BOUNDARY:
snprintf(buf, bufsize,
"PRE-DATA BOUNDARY (ID %d)",
diff --git a/src/bin/pg_dump/pg_restore.c b/src/bin/pg_dump/pg_restore.c
index 239b0d8ac0..497677494b 100644
--- a/src/bin/pg_dump/pg_restore.c
+++ b/src/bin/pg_dump/pg_restore.c
@@ -72,6 +72,7 @@ main(int argc, char **argv)
char *inputFileSpec;
static int disable_triggers = 0;
static int enable_row_security = 0;
+ static int include_subscriptions = 0;
static int if_exists = 0;
static int no_data_for_failed_tables = 0;
static int outputNoTablespaces = 0;
@@ -116,6 +117,7 @@ main(int argc, char **argv)
{"disable-triggers", no_argument, &disable_triggers, 1},
{"enable-row-security", no_argument, &enable_row_security, 1},
{"if-exists", no_argument, &if_exists, 1},
+ {"include-subscriptions", no_argument, &include_subscriptions, 1},
{"no-data-for-failed-tables", no_argument, &no_data_for_failed_tables, 1},
{"no-tablespaces", no_argument, &outputNoTablespaces, 1},
{"role", required_argument, NULL, 2},
@@ -356,6 +358,7 @@ main(int argc, char **argv)
opts->disable_triggers = disable_triggers;
opts->enable_row_security = enable_row_security;
+ opts->include_subscriptions = include_subscriptions;
opts->noDataForFailedTables = no_data_for_failed_tables;
opts->noTablespace = outputNoTablespaces;
opts->use_setsessauth = use_setsessauth;
diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl
index b732627c3a..488eec30f5 100644
--- a/src/bin/pg_dump/t/002_pg_dump.pl
+++ b/src/bin/pg_dump/t/002_pg_dump.pl
@@ -301,7 +301,7 @@ my %tests = (
'ALTER FUNCTION dump_test.pltestlang_call_handler() OWNER TO' => {
all_runs => 1,
- catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs)',
+ catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs and PUBLICATIONs)',
regexp => qr/^
\QALTER FUNCTION dump_test.pltestlang_call_handler() \E
\QOWNER TO \E
@@ -358,7 +358,7 @@ my %tests = (
'ALTER PROCEDURAL LANGUAGE pltestlang OWNER TO' => {
all_runs => 1,
- catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs)',
+ catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs and PUBLICATIONs)',
regexp => qr/^ALTER PROCEDURAL LANGUAGE pltestlang OWNER TO .*;/m,
like => {
binary_upgrade => 1,
@@ -382,7 +382,7 @@ my %tests = (
'ALTER SCHEMA dump_test OWNER TO' => {
all_runs => 1,
- catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs)',
+ catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs and PUBLICATIONs)',
regexp => qr/^ALTER SCHEMA dump_test OWNER TO .*;/m,
like => {
binary_upgrade => 1,
@@ -406,7 +406,7 @@ my %tests = (
'ALTER SCHEMA dump_test_second_schema OWNER TO' => {
all_runs => 1,
- catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs)',
+ catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs and PUBLICATIONs)',
regexp => qr/^ALTER SCHEMA dump_test_second_schema OWNER TO .*;/m,
like => {
binary_upgrade => 1,
@@ -524,7 +524,7 @@ my %tests = (
'ALTER TABLE test_table OWNER TO' => {
all_runs => 1,
- catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs)',
+ catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs and PUBLICATIONs)',
regexp => qr/^ALTER TABLE test_table OWNER TO .*;/m,
like => {
binary_upgrade => 1,
@@ -577,7 +577,7 @@ my %tests = (
'ALTER TABLE test_second_table OWNER TO' => {
all_runs => 1,
- catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs)',
+ catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs and PUBLICATIONs)',
regexp => qr/^ALTER TABLE test_second_table OWNER TO .*;/m,
like => {
binary_upgrade => 1,
@@ -601,7 +601,7 @@ my %tests = (
'ALTER TABLE test_third_table OWNER TO' => {
all_runs => 1,
- catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs)',
+ catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs and PUBLICATIONs)',
regexp => qr/^ALTER TABLE test_third_table OWNER TO .*;/m,
like => {
binary_upgrade => 1,
@@ -623,10 +623,10 @@ my %tests = (
only_dump_test_table => 1,
test_schema_plus_blobs => 1, }, },
- # catch-all for ALTER ... OWNER (except LARGE OBJECTs)
- 'ALTER ... OWNER commands (except LARGE OBJECTs)' => {
+ # catch-all for ALTER ... OWNER (except LARGE OBJECTs and PUBLICATIONs)
+ 'ALTER ... OWNER commands (except LARGE OBJECTs and PUBLICATIONs)' => {
all_runs => 0, # catch-all
- regexp => qr/^ALTER (?!LARGE OBJECT)(.*) OWNER TO .*;/m,
+ regexp => qr/^ALTER (?!LARGE OBJECT|PUBLICATION)(.*) OWNER TO .*;/m,
like => {}, # use more-specific options above
unlike => {
column_inserts => 1,
@@ -2217,6 +2217,62 @@ my %tests = (
pg_dumpall_globals_clean => 1,
role => 1,
section_pre_data => 1, }, },
+
+ 'CREATE PUBLICATION pub1' => {
+ create_order => 50,
+ create_sql => 'CREATE PUBLICATION pub1;',
+ regexp => qr/^
+ \QCREATE PUBLICATION pub1 WITH (PUBLISH INSERT, PUBLISH UPDATE, PUBLISH DELETE);\E
+ /xm,
+ like => {
+ binary_upgrade => 1,
+ clean => 1,
+ clean_if_exists => 1,
+ createdb => 1,
+ defaults => 1,
+ exclude_test_table_data => 1,
+ exclude_dump_test_schema => 1,
+ exclude_test_table => 1,
+ no_privs => 1,
+ no_owner => 1,
+ only_dump_test_schema => 1,
+ only_dump_test_table => 1,
+ pg_dumpall_dbprivs => 1,
+ schema_only => 1,
+ section_post_data => 1,
+ test_schema_plus_blobs => 1, },
+ unlike => {
+ section_pre_data => 1,
+ pg_dumpall_globals => 1,
+ pg_dumpall_globals_clean => 1, }, },
+ 'ALTER PUBLICATION pub1 ADD TABLE test_table' => {
+ create_order => 51,
+ create_sql => 'ALTER PUBLICATION pub1 ADD TABLE dump_test.test_table;',
+ regexp => qr/^
+ \QALTER PUBLICATION pub1 ADD TABLE test_table;\E
+ /xm,
+ like => {
+ binary_upgrade => 1,
+ clean => 1,
+ clean_if_exists => 1,
+ createdb => 1,
+ defaults => 1,
+ exclude_test_table_data => 1,
+ no_privs => 1,
+ no_owner => 1,
+ only_dump_test_schema => 1,
+ only_dump_test_table => 1,
+ pg_dumpall_dbprivs => 1,
+ schema_only => 1,
+ section_post_data => 1,
+ test_schema_plus_blobs => 1, },
+ unlike => {
+ section_pre_data => 1,
+ exclude_dump_test_schema => 1,
+ exclude_test_table => 1,
+ pg_dumpall_globals => 1,
+ pg_dumpall_globals_clean => 1, }, },
+
'CREATE SCHEMA dump_test' => {
all_runs => 1,
catch_all => 'CREATE ... commands',
diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c
index 4139b7763f..0c164a339c 100644
--- a/src/bin/psql/command.c
+++ b/src/bin/psql/command.c
@@ -501,6 +501,22 @@ exec_command(const char *cmd,
else
success = PSQL_CMD_UNKNOWN;
break;
+ case 'R':
+ switch (cmd[2])
+ {
+ case 'p':
+ if (show_verbose)
+ success = describePublications(pattern);
+ else
+ success = listPublications(pattern);
+ break;
+ case 's':
+ success = describeSubscriptions(pattern, show_verbose);
+ break;
+ default:
+ status = PSQL_CMD_UNKNOWN;
+ }
+ break;
case 'u':
success = describeRoles(pattern, show_verbose, show_system);
break;
diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c
index ce198779f4..c501168d8c 100644
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@@ -2387,6 +2387,38 @@ describeOneTableDetails(const char *schemaname,
}
PQclear(result);
}
+
+ /* print any publications */
+ if (pset.sversion >= 100000)
+ {
+ printfPQExpBuffer(&buf,
+ "SELECT pub.pubname\n"
+ " FROM pg_catalog.pg_publication pub\n"
+ " LEFT JOIN pg_publication_rel pr\n"
+ " ON (pr.prpubid = pub.oid)\n"
+ "WHERE pr.prrelid = '%s' OR pub.puballtables\n"
+ "ORDER BY 1;",
+ oid);
+
+ result = PSQLexec(buf.data);
+ if (!result)
+ goto error_return;
+ else
+ tuples = PQntuples(result);
+
+ if (tuples > 0)
+ printTableAddFooter(&cont, _("Publications:"));
+
+ /* Might be an empty set - that's ok */
+ for (i = 0; i < tuples; i++)
+ {
+ printfPQExpBuffer(&buf, " \"%s\"",
+ PQgetvalue(result, i, 0));
+
+ printTableAddFooter(&cont, buf.data);
+ }
+ PQclear(result);
+ }
}
if (view_def)
@@ -4846,6 +4878,266 @@ listOneExtensionContents(const char *extname, const char *oid)
return true;
}
+/* \dRp
+ * Lists publications.
+ *
+ * Takes an optional regexp to select particular publications
+ */
+bool
+listPublications(const char *pattern)
+{
+ PQExpBufferData buf;
+ PGresult *res;
+ printQueryOpt myopt = pset.popt;
+ static const bool translate_columns[] = {false, false, false, false, false};
+
+ if (pset.sversion < 100000)
+ {
+ char sverbuf[32];
+ psql_error("The server (version %s) does not support publications.\n",
+ formatPGVersionNumber(pset.sversion, false,
+ sverbuf, sizeof(sverbuf)));
+ return true;
+ }
+
+ initPQExpBuffer(&buf);
+
+ printfPQExpBuffer(&buf,
+ "SELECT pubname AS \"%s\",\n"
+ " pg_catalog.pg_get_userbyid(pubowner) AS \"%s\",\n"
+ " pubinsert AS \"%s\",\n"
+ " pubupdate AS \"%s\",\n"
+ " pubdelete AS \"%s\"\n",
+ gettext_noop("Name"),
+ gettext_noop("Owner"),
+ gettext_noop("Inserts"),
+ gettext_noop("Updates"),
+ gettext_noop("Deletes"));
+
+ appendPQExpBufferStr(&buf,
+ "\nFROM pg_catalog.pg_publication\n");
+
+ processSQLNamePattern(pset.db, &buf, pattern, false, false,
+ NULL, "pubname", NULL,
+ NULL);
+
+ appendPQExpBufferStr(&buf, "ORDER BY 1;");
+
+ res = PSQLexec(buf.data);
+ termPQExpBuffer(&buf);
+ if (!res)
+ return false;
+
+ myopt.nullPrint = NULL;
+ myopt.title = _("List of publications");
+ myopt.translate_header = true;
+ myopt.translate_columns = translate_columns;
+ myopt.n_translate_columns = lengthof(translate_columns);
+
+ printQuery(res, &myopt, pset.queryFout, false, pset.logfile);
+
+ PQclear(res);
+
+ return true;
+}
+
+/* \dRp+
+ * Describes publications including the contents.
+ *
+ * Takes an optional regexp to select particular publications
+ */
+bool
+describePublications(const char *pattern)
+{
+ PQExpBufferData buf;
+ int i;
+ PGresult *res;
+
+ if (pset.sversion < 100000)
+ {
+ char sverbuf[32];
+ psql_error("The server (version %s) does not support publications.\n",
+ formatPGVersionNumber(pset.sversion, false,
+ sverbuf, sizeof(sverbuf)));
+ return true;
+ }
+
+ initPQExpBuffer(&buf);
+
+ printfPQExpBuffer(&buf,
+ "SELECT oid, pubname, puballtables, pubinsert,\n"
+ " pubupdate, pubdelete\n"
+ "FROM pg_catalog.pg_publication\n");
+
+ processSQLNamePattern(pset.db, &buf, pattern, false, false,
+ NULL, "pubname", NULL,
+ NULL);
+
+ appendPQExpBufferStr(&buf, "ORDER BY 2;");
+
+ res = PSQLexec(buf.data);
+ if (!res)
+ {
+ termPQExpBuffer(&buf);
+ return false;
+ }
+
+ for (i = 0; i < PQntuples(res); i++)
+ {
+ const char align = 'l';
+ int ncols = 3;
+ int nrows = 1;
+ int tables = 0;
+ PGresult *tabres;
+ char *pubid = PQgetvalue(res, i, 0);
+ char *pubname = PQgetvalue(res, i, 1);
+ bool puballtables = strcmp(PQgetvalue(res, i, 2), "t") == 0;
+ int j;
+ PQExpBufferData title;
+ printTableOpt myopt = pset.popt.topt;
+ printTableContent cont;
+
+ initPQExpBuffer(&title);
+ printfPQExpBuffer(&title, _("Publication %s"), pubname);
+ printTableInit(&cont, &myopt, title.data, ncols, nrows);
+
+ printTableAddHeader(&cont, gettext_noop("Inserts"), true, align);
+ printTableAddHeader(&cont, gettext_noop("Updates"), true, align);
+ printTableAddHeader(&cont, gettext_noop("Deletes"), true, align);
+
+ printTableAddCell(&cont, PQgetvalue(res, i, 3), false, false);
+ printTableAddCell(&cont, PQgetvalue(res, i, 4), false, false);
+ printTableAddCell(&cont, PQgetvalue(res, i, 5), false, false);
+
+ if (puballtables)
+ printfPQExpBuffer(&buf,
+ "SELECT n.nspname, c.relname\n"
+ "FROM pg_catalog.pg_class c,\n"
+ " pg_catalog.pg_namespace n\n"
+ "WHERE c.relnamespace = n.oid\n"
+ " AND c.relkind = 'r'\n"
+ " AND n.nspname <> 'pg_catalog'\n"
+ " AND n.nspname <> 'information_schema'\n"
+ "ORDER BY 1,2");
+ else
+ printfPQExpBuffer(&buf,
+ "SELECT n.nspname, c.relname\n"
+ "FROM pg_catalog.pg_class c,\n"
+ " pg_catalog.pg_namespace n,\n"
+ " pg_catalog.pg_publication_rel pr\n"
+ "WHERE c.relnamespace = n.oid\n"
+ " AND c.oid = pr.prrelid\n"
+ " AND pr.prpubid = '%s'\n"
+ "ORDER BY 1,2", pubid);
+
+ tabres = PSQLexec(buf.data);
+ if (!tabres)
+ {
+ printTableCleanup(&cont);
+ PQclear(res);
+ termPQExpBuffer(&buf);
+ termPQExpBuffer(&title);
+ return false;
+ }
+ else
+ tables = PQntuples(tabres);
+
+ if (tables > 0)
+ printTableAddFooter(&cont, _("Tables:"));
+
+ for (j = 0; j < tables; j++)
+ {
+ printfPQExpBuffer(&buf, " \"%s.%s\"",
+ PQgetvalue(tabres, j, 0),
+ PQgetvalue(tabres, j, 1));
+
+ printTableAddFooter(&cont, buf.data);
+ }
+ PQclear(tabres);
+
+ printTable(&cont, pset.queryFout, false, pset.logfile);
+ printTableCleanup(&cont);
+
+ termPQExpBuffer(&title);
+ }
+
+ termPQExpBuffer(&buf);
+ PQclear(res);
+
+ return true;
+}
+
+/* \dRs
+ * Describes subscriptions.
+ *
+ * Takes an optional regexp to select particular subscriptions
+ */
+bool
+describeSubscriptions(const char *pattern, bool verbose)
+{
+ PQExpBufferData buf;
+ PGresult *res;
+ printQueryOpt myopt = pset.popt;
+ static const bool translate_columns[] = {false, false, false, false, false};
+
+ if (pset.sversion < 100000)
+ {
+ char sverbuf[32];
+ psql_error("The server (version %s) does not support subscriptions.\n",
+ formatPGVersionNumber(pset.sversion, false,
+ sverbuf, sizeof(sverbuf)));
+ return true;
+ }
+
+ initPQExpBuffer(&buf);
+
+ printfPQExpBuffer(&buf,
+ "SELECT subname AS \"%s\"\n"
+ ", pg_catalog.pg_get_userbyid(subowner) AS \"%s\"\n"
+ ", subenabled AS \"%s\"\n"
+ ", subpublications AS \"%s\"\n",
+ gettext_noop("Name"),
+ gettext_noop("Owner"),
+ gettext_noop("Enabled"),
+ gettext_noop("Publication"));
+
+ if (verbose)
+ {
+ appendPQExpBuffer(&buf,
+ ", subconninfo AS \"%s\"\n",
+ gettext_noop("Conninfo"));
+ }
+
+ /* Only display subscritpions in current database. */
+ appendPQExpBufferStr(&buf,
+ "FROM pg_catalog.pg_subscription\n"
+ "WHERE subdbid = (SELECT oid\n"
+ " FROM pg_catalog.pg_database\n"
+ " WHERE datname = current_database())");
+
+ processSQLNamePattern(pset.db, &buf, pattern, true, false,
+ NULL, "subname", NULL,
+ NULL);
+
+ appendPQExpBufferStr(&buf, "ORDER BY 1;");
+
+ res = PSQLexec(buf.data);
+ termPQExpBuffer(&buf);
+ if (!res)
+ return false;
+
+ myopt.nullPrint = NULL;
+ myopt.title = _("List of subscriptions");
+ myopt.translate_header = true;
+ myopt.translate_columns = translate_columns;
+ myopt.n_translate_columns = lengthof(translate_columns);
+
+ printQuery(res, &myopt, pset.queryFout, false, pset.logfile);
+
+ PQclear(res);
+ return true;
+}
+
/*
* printACLColumn
*
diff --git a/src/bin/psql/describe.h b/src/bin/psql/describe.h
index 4600182e4c..074553e133 100644
--- a/src/bin/psql/describe.h
+++ b/src/bin/psql/describe.h
@@ -102,4 +102,13 @@ extern bool listExtensionContents(const char *pattern);
/* \dy */
extern bool listEventTriggers(const char *pattern, bool verbose);
+/* \dRp */
+bool listPublications(const char *pattern);
+
+/* \dRp+ */
+bool describePublications(const char *pattern);
+
+/* \dRs */
+bool describeSubscriptions(const char *pattern, bool verbose);
+
#endif /* DESCRIBE_H */
diff --git a/src/bin/psql/help.c b/src/bin/psql/help.c
index 09baf871dd..53656294da 100644
--- a/src/bin/psql/help.c
+++ b/src/bin/psql/help.c
@@ -241,6 +241,8 @@ slashUsage(unsigned short int pager)
fprintf(output, _(" \\dO[S+] [PATTERN] list collations\n"));
fprintf(output, _(" \\dp [PATTERN] list table, view, and sequence access privileges\n"));
fprintf(output, _(" \\drds [PATRN1 [PATRN2]] list per-database role settings\n"));
+ fprintf(output, _(" \\dRp[+] [PATTERN] list replication publications\n"));
+ fprintf(output, _(" \\dRs[+] [PATTERN] list replication subscriptions\n"));
fprintf(output, _(" \\ds[S+] [PATTERN] list sequences\n"));
fprintf(output, _(" \\dt[S+] [PATTERN] list tables\n"));
fprintf(output, _(" \\dT[S+] [PATTERN] list data types\n"));
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 7709112f49..d6fffcf42f 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -960,11 +960,13 @@ static const pgsql_thing_t words_after_create[] = {
{"OWNED", NULL, NULL, THING_NO_CREATE}, /* for DROP OWNED BY ... */
{"PARSER", Query_for_list_of_ts_parsers, NULL, THING_NO_SHOW},
{"POLICY", NULL, NULL},
+ {"PUBLICATION", NULL, NULL},
{"ROLE", Query_for_list_of_roles},
{"RULE", "SELECT pg_catalog.quote_ident(rulename) FROM pg_catalog.pg_rules WHERE substring(pg_catalog.quote_ident(rulename),1,%d)='%s'"},
{"SCHEMA", Query_for_list_of_schemas},
{"SEQUENCE", NULL, &Query_for_list_of_sequences},
{"SERVER", Query_for_list_of_servers},
+ {"SUBSCRIPTION", NULL, NULL},
{"TABLE", NULL, &Query_for_list_of_tables},
{"TABLESPACE", Query_for_list_of_tablespaces},
{"TEMP", NULL, NULL, THING_NO_DROP}, /* for CREATE TEMP TABLE ... */
@@ -1407,8 +1409,8 @@ psql_completion(const char *text, int start, int end)
{"AGGREGATE", "COLLATION", "CONVERSION", "DATABASE", "DEFAULT PRIVILEGES", "DOMAIN",
"EVENT TRIGGER", "EXTENSION", "FOREIGN DATA WRAPPER", "FOREIGN TABLE", "FUNCTION",
"GROUP", "INDEX", "LANGUAGE", "LARGE OBJECT", "MATERIALIZED VIEW", "OPERATOR",
- "POLICY", "ROLE", "RULE", "SCHEMA", "SERVER", "SEQUENCE", "SYSTEM", "TABLE",
- "TABLESPACE", "TEXT SEARCH", "TRIGGER", "TYPE",
+ "POLICY", "PUBLICATION", "ROLE", "RULE", "SCHEMA", "SERVER", "SEQUENCE",
+ "SUBSCRIPTION", "SYSTEM", "TABLE", "TABLESPACE", "TEXT SEARCH", "TRIGGER", "TYPE",
"USER", "USER MAPPING FOR", "VIEW", NULL};
COMPLETE_WITH_LIST(list_ALTER);
@@ -1433,7 +1435,26 @@ psql_completion(const char *text, int start, int end)
else
COMPLETE_WITH_FUNCTION_ARG(prev2_wd);
}
-
+ /* ALTER PUBLICATION ...*/
+ else if (Matches3("ALTER","PUBLICATION",MatchAny))
+ {
+ COMPLETE_WITH_LIST5("WITH", "ADD TABLE", "SET TABLE", "DROP TABLE", "OWNER TO");
+ }
+ /* ALTER PUBLICATION .. WITH ( ... */
+ else if (HeadMatches3("ALTER", "PUBLICATION",MatchAny) && TailMatches2("WITH", "("))
+ {
+ COMPLETE_WITH_LIST6("PUBLISH INSERT", "NOPUBLISH INSERT", "PUBLISH UPDATE",
+ "NOPUBLISH UPDATE", "PUBLISH DELETE", "NOPUBLISH DELETE");
+ }
+ /* ALTER SUBSCRIPTION ... */
+ else if (Matches3("ALTER","SUBSCRIPTION",MatchAny))
+ {
+ COMPLETE_WITH_LIST6("WITH", "CONNECTION", "SET PUBLICATION", "ENABLE", "DISABLE", "OWNER TO");
+ }
+ else if (HeadMatches3("ALTER", "SUBSCRIPTION", MatchAny) && TailMatches2("WITH", "("))
+ {
+ COMPLETE_WITH_CONST("SLOT NAME");
+ }
/* ALTER SCHEMA */
else if (Matches3("ALTER", "SCHEMA", MatchAny))
COMPLETE_WITH_LIST2("OWNER TO", "RENAME TO");
@@ -2227,6 +2248,20 @@ psql_completion(const char *text, int start, int end)
COMPLETE_WITH_CONST("(");
+/* CREATE PUBLICATION */
+ else if (Matches3("CREATE", "PUBLICATION", MatchAny))
+ COMPLETE_WITH_LIST3("FOR TABLE", "FOR ALL TABLES", "WITH (");
+ else if (Matches4("CREATE", "PUBLICATION", MatchAny, "FOR"))
+ COMPLETE_WITH_LIST2("TABLE", "ALL TABLES");
+ /* Complete "CREATE PUBLICATION FOR TABLE
" */
+ else if (Matches4("CREATE", "PUBLICATION", MatchAny, "FOR TABLE"))
+ COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_tables, NULL);
+ /* Complete "CREATE PUBLICATION [...] WITH" */
+ else if (HeadMatches2("CREATE", "PUBLICATION") && TailMatches2("WITH", "("))
+ COMPLETE_WITH_LIST2("PUBLISH", "NOPUBLISH");
+ else if (HeadMatches2("CREATE", "PUBLICATION") && TailMatches3("WITH", "(", MatchAny))
+ COMPLETE_WITH_LIST3("INSERT", "UPDATE", "DELETE");
+
/* CREATE RULE */
/* Complete "CREATE RULE " with "AS ON" */
else if (Matches3("CREATE", "RULE", MatchAny))
@@ -2278,6 +2313,16 @@ psql_completion(const char *text, int start, int end)
else if (Matches5("CREATE", "TEXT", "SEARCH", "CONFIGURATION", MatchAny))
COMPLETE_WITH_CONST("(");
+/* CREATE SUBSCRIPTION */
+ else if (Matches3("CREATE", "SUBSCRIPTION", MatchAny))
+ COMPLETE_WITH_CONST("CONNECTION");
+ else if (Matches5("CREATE", "SUBSCRIPTION", MatchAny, "CONNECTION",MatchAny))
+ COMPLETE_WITH_CONST("PUBLICATION");
+ /* Complete "CREATE SUBSCRIPTION ... WITH ( " */
+ else if (HeadMatches2("CREATE", "SUBSCRIPTION") && TailMatches2("WITH", "("))
+ COMPLETE_WITH_LIST5("ENABLED", "DISABLED", "CREATE SLOT",
+ "NOCREATE SLOT", "SLOT NAME");
+
/* CREATE TRIGGER --- is allowed inside CREATE SCHEMA, so use TailMatches */
/* complete CREATE TRIGGER with BEFORE,AFTER,INSTEAD OF */
else if (TailMatches3("CREATE", "TRIGGER", MatchAny))
@@ -2438,7 +2483,7 @@ psql_completion(const char *text, int start, int end)
/* DROP */
/* Complete DROP object with CASCADE / RESTRICT */
else if (Matches3("DROP",
- "COLLATION|CONVERSION|DOMAIN|EXTENSION|LANGUAGE|SCHEMA|SEQUENCE|SERVER|TABLE|TYPE|VIEW",
+ "COLLATION|CONVERSION|DOMAIN|EXTENSION|LANGUAGE|PUBLICATION|SCHEMA|SEQUENCE|SERVER|TABLE|TYPE|VIEW",
MatchAny) ||
Matches4("DROP", "ACCESS", "METHOD", MatchAny) ||
(Matches4("DROP", "AGGREGATE|FUNCTION", MatchAny, MatchAny) &&
diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h
index ee7957418c..10759c7c58 100644
--- a/src/include/catalog/dependency.h
+++ b/src/include/catalog/dependency.h
@@ -161,6 +161,9 @@ typedef enum ObjectClass
OCLASS_EXTENSION, /* pg_extension */
OCLASS_EVENT_TRIGGER, /* pg_event_trigger */
OCLASS_POLICY, /* pg_policy */
+ OCLASS_PUBLICATION, /* pg_publication */
+ OCLASS_PUBLICATION_REL, /* pg_publication_rel */
+ OCLASS_SUBSCRIPTION, /* pg_subscription */
OCLASS_TRANSFORM /* pg_transform */
} ObjectClass;
diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h
index 674382f909..45605a0dfd 100644
--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -325,6 +325,24 @@ DECLARE_UNIQUE_INDEX(pg_replication_origin_roname_index, 6002, on pg_replication
DECLARE_UNIQUE_INDEX(pg_partitioned_table_partrelid_index, 3351, on pg_partitioned_table using btree(partrelid oid_ops));
#define PartitionedRelidIndexId 3351
+DECLARE_UNIQUE_INDEX(pg_publication_oid_index, 6110, on pg_publication using btree(oid oid_ops));
+#define PublicationObjectIndexId 6110
+
+DECLARE_UNIQUE_INDEX(pg_publication_pubname_index, 6111, on pg_publication using btree(pubname name_ops));
+#define PublicationNameIndexId 6111
+
+DECLARE_UNIQUE_INDEX(pg_publication_rel_oid_index, 6112, on pg_publication_rel using btree(oid oid_ops));
+#define PublicationRelObjectIndexId 6112
+
+DECLARE_UNIQUE_INDEX(pg_publication_rel_map_index, 6113, on pg_publication_rel using btree(prrelid oid_ops, prpubid oid_ops));
+#define PublicationRelMapIndexId 6113
+
+DECLARE_UNIQUE_INDEX(pg_subscription_oid_index, 6114, on pg_subscription using btree(oid oid_ops));
+#define SubscriptionObjectIndexId 6114
+
+DECLARE_UNIQUE_INDEX(pg_subscription_subname_index, 6115, on pg_subscription using btree(subdbid oid_ops, subname name_ops));
+#define SubscriptionNameIndexId 6115
+
/* last step of initialization script: build the indexes declared above */
BUILD_INDICES
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 03f55a1cc5..ab12761643 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2776,6 +2776,8 @@ DATA(insert OID = 3099 ( pg_stat_get_wal_senders PGNSP PGUID 12 1 10 0 0 f f f
DESCR("statistics: information about currently active replication");
DATA(insert OID = 3317 ( pg_stat_get_wal_receiver PGNSP PGUID 12 1 0 0 0 f f f f f f s r 0 0 2249 "" "{23,25,3220,23,3220,23,1184,1184,3220,1184,25,25}" "{o,o,o,o,o,o,o,o,o,o,o,o}" "{pid,status,receive_start_lsn,receive_start_tli,received_lsn,received_tli,last_msg_send_time,last_msg_receipt_time,latest_end_lsn,latest_end_time,slot_name,conninfo}" _null_ _null_ pg_stat_get_wal_receiver _null_ _null_ _null_ ));
DESCR("statistics: information about WAL receiver");
+DATA(insert OID = 6118 ( pg_stat_get_subscription PGNSP PGUID 12 1 0 0 0 f f f f f f s r 1 0 2249 "26" "{26,26,23,3220,1184,1184,3220,1184}" "{i,o,o,o,o,o,o,o}" "{subid,subid,pid,received_lsn,last_msg_send_time,last_msg_receipt_time,latest_end_lsn,latest_end_time}" _null_ _null_ pg_stat_get_subscription _null_ _null_ _null_ ));
+DESCR("statistics: information about subscription");
DATA(insert OID = 2026 ( pg_backend_pid PGNSP PGUID 12 1 0 0 0 f f f f t f s r 0 0 23 "" _null_ _null_ _null_ _null_ _null_ pg_backend_pid _null_ _null_ _null_ ));
DESCR("statistics: current backend PID");
DATA(insert OID = 1937 ( pg_stat_get_backend_pid PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 23 "23" _null_ _null_ _null_ _null_ _null_ pg_stat_get_backend_pid _null_ _null_ _null_ ));
@@ -5327,6 +5329,10 @@ DESCR("get an individual replication origin's replication progress");
DATA(insert OID = 6014 ( pg_show_replication_origin_status PGNSP PGUID 12 1 100 0 0 f f f f f t v r 0 0 2249 "" "{26,25,3220,3220}" "{o,o,o,o}" "{local_id, external_id, remote_lsn, local_lsn}" _null_ _null_ pg_show_replication_origin_status _null_ _null_ _null_ ));
DESCR("get progress for all replication origins");
+/* publications */
+DATA(insert OID = 6119 ( pg_get_publication_tables PGNSP PGUID 12 1 1000 0 0 f f t f t t s s 1 0 26 "25" "{25,26}" "{i,o}" "{pubname,relid}" _null_ _null_ pg_get_publication_tables _null_ _null_ _null_ ));
+DESCR("get OIDs of tables in a publication");
+
/* rls */
DATA(insert OID = 3298 ( row_security_active PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 16 "26" _null_ _null_ _null_ _null_ _null_ row_security_active _null_ _null_ _null_ ));
DESCR("row security for current context active on table by table oid");
diff --git a/src/include/catalog/pg_publication.h b/src/include/catalog/pg_publication.h
new file mode 100644
index 0000000000..f157d0f407
--- /dev/null
+++ b/src/include/catalog/pg_publication.h
@@ -0,0 +1,104 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_publication.h
+ * definition of the relation sets relation (pg_publication)
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/catalog/pg_publication.h
+ *
+ * NOTES
+ * the genbki.pl script reads this file and generates .bki
+ * information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_PUBLICATION_H
+#define PG_PUBLICATION_H
+
+#include "catalog/genbki.h"
+#include "catalog/objectaddress.h"
+
+/* ----------------
+ * pg_publication definition. cpp turns this into
+ * typedef struct FormData_pg_publication
+ *
+ * ----------------
+ */
+#define PublicationRelationId 6104
+
+CATALOG(pg_publication,6104)
+{
+ NameData pubname; /* name of the publication */
+
+ Oid pubowner; /* publication owner */
+
+ /*
+ * indicates that this is special publication which should encompass
+ * all tables in the database (except for the unlogged and temp ones)
+ */
+ bool puballtables;
+
+ /* true if inserts are published */
+ bool pubinsert;
+
+ /* true if updates are published */
+ bool pubupdate;
+
+ /* true if deletes are published */
+ bool pubdelete;
+
+} FormData_pg_publication;
+
+/* ----------------
+ * Form_pg_publication corresponds to a pointer to a tuple with
+ * the format of pg_publication relation.
+ * ----------------
+ */
+typedef FormData_pg_publication *Form_pg_publication;
+
+/* ----------------
+ * compiler constants for pg_publication
+ * ----------------
+ */
+
+#define Natts_pg_publication 6
+#define Anum_pg_publication_pubname 1
+#define Anum_pg_publication_pubowner 2
+#define Anum_pg_publication_puballtables 3
+#define Anum_pg_publication_pubinsert 4
+#define Anum_pg_publication_pubupdate 5
+#define Anum_pg_publication_pubdelete 6
+
+typedef struct PublicationActions
+{
+ bool pubinsert;
+ bool pubupdate;
+ bool pubdelete;
+} PublicationActions;
+
+typedef struct Publication
+{
+ Oid oid;
+ char *name;
+ bool alltables;
+ PublicationActions pubactions;
+} Publication;
+
+extern Publication *GetPublication(Oid pubid);
+extern Publication *GetPublicationByName(const char *pubname, bool missing_ok);
+extern List *GetRelationPublications(Oid relid);
+extern List *GetPublicationRelations(Oid pubid);
+extern List *GetAllTablesPublications(void);
+extern List *GetAllTablesPublicationRelations(void);
+
+extern ObjectAddress publication_add_relation(Oid pubid, Relation targetrel,
+ bool if_not_exists);
+
+extern Oid get_publication_oid(const char *pubname, bool missing_ok);
+extern char *get_publication_name(Oid pubid);
+
+extern Datum pg_get_publication_tables(PG_FUNCTION_ARGS);
+
+#endif /* PG_PUBLICATION_H */
diff --git a/src/include/catalog/pg_publication_rel.h b/src/include/catalog/pg_publication_rel.h
new file mode 100644
index 0000000000..ac09e29669
--- /dev/null
+++ b/src/include/catalog/pg_publication_rel.h
@@ -0,0 +1,52 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_publication_rel.h
+ * definition of the publication to relation map (pg_publication_rel)
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/catalog/pg_publication_rel.h
+ *
+ * NOTES
+ * the genbki.pl script reads this file and generates .bki
+ * information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_PUBLICATION_REL_H
+#define PG_PUBLICATION_REL_H
+
+#include "catalog/genbki.h"
+
+/* ----------------
+ * pg_publication_rel definition. cpp turns this into
+ * typedef struct FormData_pg_publication_rel
+ *
+ * ----------------
+ */
+#define PublicationRelRelationId 6106
+
+CATALOG(pg_publication_rel,6106)
+{
+ Oid prpubid; /* Oid of the publication */
+ Oid prrelid; /* Oid of the relation */
+} FormData_pg_publication_rel;
+
+/* ----------------
+ * Form_pg_publication_rel corresponds to a pointer to a tuple with
+ * the format of pg_publication_rel relation.
+ * ----------------
+ */
+typedef FormData_pg_publication_rel *Form_pg_publication_rel;
+
+/* ----------------
+ * compiler constants for pg_publication_rel
+ * ----------------
+ */
+
+#define Natts_pg_publication_rel 2
+#define Anum_pg_publication_rel_prpubid 1
+#define Anum_pg_publication_rel_prrelid 2
+
+#endif /* PG_PUBLICATION_REL_H */
diff --git a/src/include/catalog/pg_subscription.h b/src/include/catalog/pg_subscription.h
new file mode 100644
index 0000000000..0ad7b0e321
--- /dev/null
+++ b/src/include/catalog/pg_subscription.h
@@ -0,0 +1,83 @@
+/* -------------------------------------------------------------------------
+ *
+ * pg_subscription.h
+ * Definition of the subscription catalog (pg_subscription).
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * -------------------------------------------------------------------------
+ */
+#ifndef PG_SUBSCRIPTION_H
+#define PG_SUBSCRIPTION_H
+
+#include "catalog/genbki.h"
+
+/* ----------------
+ * pg_subscription definition. cpp turns this into
+ * typedef struct FormData_pg_subscription
+ * ----------------
+ */
+#define SubscriptionRelationId 6100
+#define SubscriptionRelation_Rowtype_Id 6101
+
+/*
+ * Technicaly, the subscriptions live inside the database, so a shared catalog
+ * seems weird, but the replication launcher process needs to access all of
+ * them to be able to start the workers, so we have to put them in a shared,
+ * nailed catalog.
+ */
+CATALOG(pg_subscription,6100) BKI_SHARED_RELATION BKI_ROWTYPE_OID(6101) BKI_SCHEMA_MACRO
+{
+ Oid subdbid; /* Database the subscription is in. */
+ NameData subname; /* Name of the subscription */
+
+ Oid subowner; /* Owner of the subscription */
+
+ bool subenabled; /* True if the subsription is enabled
+ * (the worker should be running) */
+
+#ifdef CATALOG_VARLEN /* variable-length fields start here */
+ text subconninfo; /* Connection string to the publisher */
+ NameData subslotname; /* Slot name on publisher */
+
+ text subpublications[1]; /* List of publications subscribed to */
+#endif
+} FormData_pg_subscription;
+
+typedef FormData_pg_subscription *Form_pg_subscription;
+
+/* ----------------
+ * compiler constants for pg_subscription
+ * ----------------
+ */
+#define Natts_pg_subscription 7
+#define Anum_pg_subscription_subdbid 1
+#define Anum_pg_subscription_subname 2
+#define Anum_pg_subscription_subowner 3
+#define Anum_pg_subscription_subenabled 4
+#define Anum_pg_subscription_subconninfo 5
+#define Anum_pg_subscription_subslotname 6
+#define Anum_pg_subscription_subpublications 7
+
+
+typedef struct Subscription
+{
+ Oid oid; /* Oid of the subscription */
+ Oid dbid; /* Oid of the database which dubscription is in */
+ char *name; /* Name of the subscription */
+ Oid owner; /* Oid of the subscription owner */
+ bool enabled; /* Indicates if the subscription is enabled */
+ char *conninfo; /* Connection string to the publisher */
+ char *slotname; /* Name of the replication slot */
+ List *publications; /* List of publication names to subscribe to */
+} Subscription;
+
+extern Subscription *GetSubscription(Oid subid, bool missing_ok);
+extern void FreeSubscription(Subscription *sub);
+extern Oid get_subscription_oid(const char *subname, bool missing_ok);
+extern char *get_subscription_name(Oid subid);
+
+extern int CountDBSubscriptions(Oid dbid);
+
+#endif /* PG_SUBSCRIPTION_H */
diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h
index 6eb4f95fdd..558a91d06f 100644
--- a/src/include/commands/defrem.h
+++ b/src/include/commands/defrem.h
@@ -154,5 +154,6 @@ extern int64 defGetInt64(DefElem *def);
extern List *defGetQualifiedName(DefElem *def);
extern TypeName *defGetTypeName(DefElem *def);
extern int defGetTypeLength(DefElem *def);
+extern List *defGetStringList(DefElem *def);
#endif /* DEFREM_H */
diff --git a/src/include/commands/publicationcmds.h b/src/include/commands/publicationcmds.h
new file mode 100644
index 0000000000..2307cea070
--- /dev/null
+++ b/src/include/commands/publicationcmds.h
@@ -0,0 +1,28 @@
+/*-------------------------------------------------------------------------
+ *
+ * publicationcmds.h
+ * prototypes for publicationcmds.c.
+ *
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/commands/publicationcmds.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef PUBLICATIONCMDS_H
+#define PUBLICATIONCMDS_H
+
+#include "nodes/parsenodes.h"
+
+extern ObjectAddress CreatePublication(CreatePublicationStmt *stmt);
+extern void AlterPublication(AlterPublicationStmt *stmt);
+extern void RemovePublicationById(Oid pubid);
+extern void RemovePublicationRelById(Oid proid);
+
+extern ObjectAddress AlterPublicationOwner(const char *name, Oid newOwnerId);
+extern void AlterPublicationOwner_oid(Oid pubid, Oid newOwnerId);
+
+#endif /* PUBLICATIONCMDS_H */
diff --git a/src/include/commands/subscriptioncmds.h b/src/include/commands/subscriptioncmds.h
new file mode 100644
index 0000000000..1d8e2aa412
--- /dev/null
+++ b/src/include/commands/subscriptioncmds.h
@@ -0,0 +1,27 @@
+/*-------------------------------------------------------------------------
+ *
+ * subscriptioncmds.h
+ * prototypes for subscriptioncmds.c.
+ *
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/commands/subscriptioncmds.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef SUBSCRIPTIONCMDS_H
+#define SUBSCRIPTIONCMDS_H
+
+#include "nodes/parsenodes.h"
+
+extern ObjectAddress CreateSubscription(CreateSubscriptionStmt *stmt);
+extern ObjectAddress AlterSubscription(AlterSubscriptionStmt *stmt);
+extern void DropSubscription(DropSubscriptionStmt *stmt);
+
+extern ObjectAddress AlterSubscriptionOwner(const char *name, Oid newOwnerId);
+extern void AlterSubscriptionOwner_oid(Oid subid, Oid newOwnerId);
+
+#endif /* SUBSCRIPTIONCMDS_H */
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 74aa63536b..02dbe7b228 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -392,5 +392,22 @@ extern void check_exclusion_constraint(Relation heap, Relation index,
Datum *values, bool *isnull,
EState *estate, bool newIndex);
+/*
+ * prototypes from functions in execReplication.c
+ */
+extern bool RelationFindReplTupleByIndex(Relation rel, Oid idxoid,
+ LockTupleMode lockmode,
+ TupleTableSlot *searchslot,
+ TupleTableSlot *outslot);
+extern bool RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode,
+ TupleTableSlot *searchslot, TupleTableSlot *outslot);
+
+extern void ExecSimpleRelationInsert(EState *estate, TupleTableSlot *slot);
+extern void ExecSimpleRelationUpdate(EState *estate, EPQState *epqstate,
+ TupleTableSlot *searchslot, TupleTableSlot *slot);
+extern void ExecSimpleRelationDelete(EState *estate, EPQState *epqstate,
+ TupleTableSlot *searchslot);
+extern void CheckCmdReplicaIdentity(Relation rel, CmdType cmd);
+
#endif /* EXECUTOR_H */
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index d65958153d..fa4932a902 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -411,6 +411,11 @@ typedef enum NodeTag
T_CreateTransformStmt,
T_CreateAmStmt,
T_PartitionCmd,
+ T_CreatePublicationStmt,
+ T_AlterPublicationStmt,
+ T_CreateSubscriptionStmt,
+ T_AlterSubscriptionStmt,
+ T_DropSubscriptionStmt,
/*
* TAGS FOR PARSE TREE NODES (parsenodes.h)
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index edb5cd2152..aad4699f48 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -1547,10 +1547,13 @@ typedef enum ObjectType
OBJECT_OPERATOR,
OBJECT_OPFAMILY,
OBJECT_POLICY,
+ OBJECT_PUBLICATION,
+ OBJECT_PUBLICATION_REL,
OBJECT_ROLE,
OBJECT_RULE,
OBJECT_SCHEMA,
OBJECT_SEQUENCE,
+ OBJECT_SUBSCRIPTION,
OBJECT_TABCONSTRAINT,
OBJECT_TABLE,
OBJECT_TABLESPACE,
@@ -3248,4 +3251,52 @@ typedef struct AlterTSConfigurationStmt
bool missing_ok; /* for DROP - skip error if missing? */
} AlterTSConfigurationStmt;
+
+typedef struct CreatePublicationStmt
+{
+ NodeTag type;
+ char *pubname; /* Name of of the publication */
+ List *options; /* List of DefElem nodes */
+ List *tables; /* Optional list of tables to add */
+ bool for_all_tables; /* Special publication for all tables in db */
+} CreatePublicationStmt;
+
+typedef struct AlterPublicationStmt
+{
+ NodeTag type;
+ char *pubname; /* Name of of the publication */
+
+ /* parameters used for ALTER PUBLICATION ... WITH */
+ List *options; /* List of DefElem nodes */
+
+ /* parameters used for ALTER PUBLICATION ... ADD/DROP TABLE */
+ List *tables; /* List of tables to add/drop */
+ bool for_all_tables; /* Special publication for all tables in db */
+ DefElemAction tableAction; /* What action to perform with the tables */
+} AlterPublicationStmt;
+
+typedef struct CreateSubscriptionStmt
+{
+ NodeTag type;
+ char *subname; /* Name of of the subscription */
+ char *conninfo; /* Connection string to publisher */
+ List *publication; /* One or more publication to subscribe to */
+ List *options; /* List of DefElem nodes */
+} CreateSubscriptionStmt;
+
+typedef struct AlterSubscriptionStmt
+{
+ NodeTag type;
+ char *subname; /* Name of of the subscription */
+ List *options; /* List of DefElem nodes */
+} AlterSubscriptionStmt;
+
+typedef struct DropSubscriptionStmt
+{
+ NodeTag type;
+ char *subname; /* Name of of the subscription */
+ bool drop_slot; /* Should we drop the slot on remote side? */
+ bool missing_ok; /* Skip error if missing? */
+} DropSubscriptionStmt;
+
#endif /* PARSENODES_H */
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index 9978573e8b..985d6505ec 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -308,6 +308,7 @@ PG_KEYWORD("privileges", PRIVILEGES, UNRESERVED_KEYWORD)
PG_KEYWORD("procedural", PROCEDURAL, UNRESERVED_KEYWORD)
PG_KEYWORD("procedure", PROCEDURE, UNRESERVED_KEYWORD)
PG_KEYWORD("program", PROGRAM, UNRESERVED_KEYWORD)
+PG_KEYWORD("publication", PUBLICATION, UNRESERVED_KEYWORD)
PG_KEYWORD("quote", QUOTE, UNRESERVED_KEYWORD)
PG_KEYWORD("range", RANGE, UNRESERVED_KEYWORD)
PG_KEYWORD("read", READ, UNRESERVED_KEYWORD)
@@ -360,6 +361,7 @@ PG_KEYWORD("show", SHOW, UNRESERVED_KEYWORD)
PG_KEYWORD("similar", SIMILAR, TYPE_FUNC_NAME_KEYWORD)
PG_KEYWORD("simple", SIMPLE, UNRESERVED_KEYWORD)
PG_KEYWORD("skip", SKIP, UNRESERVED_KEYWORD)
+PG_KEYWORD("slot", SLOT, UNRESERVED_KEYWORD)
PG_KEYWORD("smallint", SMALLINT, COL_NAME_KEYWORD)
PG_KEYWORD("snapshot", SNAPSHOT, UNRESERVED_KEYWORD)
PG_KEYWORD("some", SOME, RESERVED_KEYWORD)
@@ -374,6 +376,7 @@ PG_KEYWORD("stdout", STDOUT, UNRESERVED_KEYWORD)
PG_KEYWORD("storage", STORAGE, UNRESERVED_KEYWORD)
PG_KEYWORD("strict", STRICT_P, UNRESERVED_KEYWORD)
PG_KEYWORD("strip", STRIP_P, UNRESERVED_KEYWORD)
+PG_KEYWORD("subscription", SUBSCRIPTION, UNRESERVED_KEYWORD)
PG_KEYWORD("substring", SUBSTRING, COL_NAME_KEYWORD)
PG_KEYWORD("symmetric", SYMMETRIC, RESERVED_KEYWORD)
PG_KEYWORD("sysid", SYSID, UNRESERVED_KEYWORD)
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 5b37894cb5..de8225b989 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -745,7 +745,9 @@ typedef enum
WAIT_EVENT_SYSLOGGER_MAIN,
WAIT_EVENT_WAL_RECEIVER_MAIN,
WAIT_EVENT_WAL_SENDER_MAIN,
- WAIT_EVENT_WAL_WRITER_MAIN
+ WAIT_EVENT_WAL_WRITER_MAIN,
+ WAIT_EVENT_LOGICAL_LAUNCHER_MAIN,
+ WAIT_EVENT_LOGICAL_APPLY_MAIN
} WaitEventActivity;
/* ----------
diff --git a/src/include/replication/logicallauncher.h b/src/include/replication/logicallauncher.h
new file mode 100644
index 0000000000..715ac7f24c
--- /dev/null
+++ b/src/include/replication/logicallauncher.h
@@ -0,0 +1,27 @@
+/*-------------------------------------------------------------------------
+ *
+ * logicallauncher.h
+ * Exports for logical replication launcher.
+ *
+ * Portions Copyright (c) 2010-2016, PostgreSQL Global Development Group
+ *
+ * src/include/replication/logicallauncher.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef LOGICALLAUNCHER_H
+#define LOGICALLAUNCHER_H
+
+extern int max_logical_replication_workers;
+
+extern void ApplyLauncherRegister(void);
+extern void ApplyLauncherMain(Datum main_arg);
+
+extern Size ApplyLauncherShmemSize(void);
+extern void ApplyLauncherShmemInit(void);
+
+extern void ApplyLauncherWakeup(void);
+extern void ApplyLauncherWakeupAtCommit(void);
+extern void AtCommit_ApplyLauncher(void);
+
+#endif /* LOGICALLAUNCHER_H */
diff --git a/src/include/replication/logicalproto.h b/src/include/replication/logicalproto.h
new file mode 100644
index 0000000000..0d8153c39d
--- /dev/null
+++ b/src/include/replication/logicalproto.h
@@ -0,0 +1,104 @@
+/*-------------------------------------------------------------------------
+ *
+ * logicalproto.h
+ * logical replication protocol
+ *
+ * Copyright (c) 2015, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/include/replication/logicalproto.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef LOGICAL_PROTO_H
+#define LOGICAL_PROTO_H
+
+#include "replication/reorderbuffer.h"
+#include "utils/rel.h"
+
+/*
+ * Protocol capabilities
+ *
+ * LOGICAL_PROTO_VERSION_NUM is our native protocol and the greatest version
+ * we can support. PGLOGICAL_PROTO_MIN_VERSION_NUM is the oldest version we
+ * have backwards compatibility for. The client requests protocol version at
+ * connect time.
+ */
+#define LOGICALREP_PROTO_MIN_VERSION_NUM 1
+#define LOGICALREP_PROTO_VERSION_NUM 1
+
+/* Tuple coming via logical replication. */
+typedef struct LogicalRepTupleData
+{
+ char *values[MaxTupleAttributeNumber]; /* value in out function format or NULL if values is NULL */
+ bool changed[MaxTupleAttributeNumber]; /* marker for changed/unchanged values */
+} LogicalRepTupleData;
+
+typedef uint32 LogicalRepRelId;
+
+/* Relation information */
+typedef struct LogicalRepRelation
+{
+ /* Info coming from the remote side. */
+ LogicalRepRelId remoteid; /* unique id of the relation */
+ char *nspname; /* schema name */
+ char *relname; /* relation name */
+ int natts; /* number of columns */
+ char **attnames; /* column names */
+ Oid *atttyps; /* column types */
+ char replident; /* replica identity */
+ Bitmapset *attkeys; /* Bitmap of key columns */
+} LogicalRepRelation;
+
+/* Type mapping info */
+typedef struct LogicalRepTyp
+{
+ Oid remoteid; /* unique id of the type */
+ char *nspname; /* schema name */
+ char *typname; /* name of the type */
+ Oid typoid; /* local type Oid */
+} LogicalRepTyp;
+
+/* Transaction info */
+typedef struct LogicalRepBeginData
+{
+ XLogRecPtr final_lsn;
+ TimestampTz committime;
+ TransactionId xid;
+} LogicalRepBeginData;
+
+typedef struct LogicalRepCommitData
+{
+ XLogRecPtr commit_lsn;
+ XLogRecPtr end_lsn;
+ TimestampTz committime;
+} LogicalRepCommitData;
+
+extern void logicalrep_write_begin(StringInfo out, ReorderBufferTXN *txn);
+extern void logicalrep_read_begin(StringInfo in,
+ LogicalRepBeginData *begin_data);
+extern void logicalrep_write_commit(StringInfo out, ReorderBufferTXN *txn,
+ XLogRecPtr commit_lsn);
+extern void logicalrep_read_commit(StringInfo in,
+ LogicalRepCommitData *commit_data);
+extern void logicalrep_write_origin(StringInfo out, const char *origin,
+ XLogRecPtr origin_lsn);
+extern char *logicalrep_read_origin(StringInfo in, XLogRecPtr *origin_lsn);
+extern void logicalrep_write_insert(StringInfo out, Relation rel,
+ HeapTuple newtuple);
+extern LogicalRepRelId logicalrep_read_insert(StringInfo in, LogicalRepTupleData *newtup);
+extern void logicalrep_write_update(StringInfo out, Relation rel, HeapTuple oldtuple,
+ HeapTuple newtuple);
+extern LogicalRepRelId logicalrep_read_update(StringInfo in,
+ bool *has_oldtuple, LogicalRepTupleData *oldtup,
+ LogicalRepTupleData *newtup);
+extern void logicalrep_write_delete(StringInfo out, Relation rel,
+ HeapTuple oldtuple);
+extern LogicalRepRelId logicalrep_read_delete(StringInfo in,
+ LogicalRepTupleData *oldtup);
+extern void logicalrep_write_rel(StringInfo out, Relation rel);
+extern LogicalRepRelation *logicalrep_read_rel(StringInfo in);
+extern void logicalrep_write_typ(StringInfo out, Oid typoid);
+extern void logicalrep_read_typ(StringInfo out, LogicalRepTyp *ltyp);
+
+#endif /* LOGICALREP_PROTO_H */
diff --git a/src/include/replication/logicalrelation.h b/src/include/replication/logicalrelation.h
new file mode 100644
index 0000000000..8f9f4a094d
--- /dev/null
+++ b/src/include/replication/logicalrelation.h
@@ -0,0 +1,43 @@
+/*-------------------------------------------------------------------------
+ *
+ * logicalrelation.h
+ * Relation definitions for logical replication relation mapping.
+ *
+ * Portions Copyright (c) 2010-2016, PostgreSQL Global Development Group
+ *
+ * src/include/replication/logicalrelation.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef LOGICALRELATION_H
+#define LOGICALRELATION_H
+
+#include "replication/logicalproto.h"
+
+typedef struct LogicalRepRelMapEntry
+{
+ LogicalRepRelation remoterel; /* key is remoterel.remoteid */
+
+ /* Mapping to local relation, filled as needed. */
+ Oid localreloid; /* local relation id */
+ Relation localrel; /* relcache entry */
+ AttrNumber *attrmap; /* map of local attributes to
+ * remote ones */
+ bool updatable; /* Can apply updates/detetes? */
+
+ /* Sync state. */
+ char state;
+ XLogRecPtr statelsn;
+} LogicalRepRelMapEntry;
+
+extern void logicalrep_relmap_update(LogicalRepRelation *remoterel);
+
+extern LogicalRepRelMapEntry *logicalrep_rel_open(LogicalRepRelId remoteid,
+ LOCKMODE lockmode);
+extern void logicalrep_rel_close(LogicalRepRelMapEntry *rel,
+ LOCKMODE lockmode);
+
+extern void logicalrep_typmap_update(LogicalRepTyp *remotetyp);
+extern Oid logicalrep_typmap_getid(Oid remoteid);
+
+#endif /* LOGICALRELATION_H */
diff --git a/src/include/replication/logicalworker.h b/src/include/replication/logicalworker.h
new file mode 100644
index 0000000000..93cb25f438
--- /dev/null
+++ b/src/include/replication/logicalworker.h
@@ -0,0 +1,17 @@
+/*-------------------------------------------------------------------------
+ *
+ * logicalworker.h
+ * Exports for logical replication workers.
+ *
+ * Portions Copyright (c) 2010-2016, PostgreSQL Global Development Group
+ *
+ * src/include/replication/logicalworker.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef LOGICALWORKER_H
+#define LOGICALWORKER_H
+
+extern void ApplyWorkerMain(Datum main_arg);
+
+#endif /* LOGICALWORKER_H */
diff --git a/src/include/replication/pgoutput.h b/src/include/replication/pgoutput.h
new file mode 100644
index 0000000000..c20451d1f2
--- /dev/null
+++ b/src/include/replication/pgoutput.h
@@ -0,0 +1,29 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgoutput.h
+ * Logical Replication output plugin
+ *
+ * Copyright (c) 2015, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * pgoutput.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGOUTPUT_H
+#define PGOUTPUT_H
+
+
+typedef struct PGOutputData
+{
+ MemoryContext context; /* private memory context for transient
+ * allocations */
+
+ /* client info */
+ uint32 protocol_version;
+
+ List *publication_names;
+ List *publications;
+} PGOutputData;
+
+#endif /* PGOUTPUT_H */
diff --git a/src/include/replication/walreceiver.h b/src/include/replication/walreceiver.h
index 6ab2c6f9a5..0857bdc556 100644
--- a/src/include/replication/walreceiver.h
+++ b/src/include/replication/walreceiver.h
@@ -134,34 +134,64 @@ typedef struct
extern WalRcvData *WalRcv;
+typedef struct
+{
+ bool logical; /* True if this is logical
+ replication stream, false if
+ physical stream. */
+ char *slotname; /* Name of the replication slot
+ or NULL. */
+ XLogRecPtr startpoint; /* LSN of starting point. */
+
+ union
+ {
+ struct
+ {
+ TimeLineID startpointTLI; /* Starting timeline */
+ } physical;
+ struct
+ {
+ uint32 proto_version; /* Logical protocol version */
+ List *publication_names; /* String list of publications */
+ } logical;
+ } proto;
+} WalRcvStreamOptions;
+
struct WalReceiverConn;
typedef struct WalReceiverConn WalReceiverConn;
/* libpqwalreceiver hooks */
typedef WalReceiverConn *(*walrcv_connect_fn) (const char *conninfo, bool logical,
- const char *appname);
+ const char *appname,
+ char **err);
+typedef void (*walrcv_check_conninfo_fn) (const char *conninfo);
typedef char *(*walrcv_get_conninfo_fn) (WalReceiverConn *conn);
typedef char *(*walrcv_identify_system_fn) (WalReceiverConn *conn,
- TimeLineID *primary_tli);
+ TimeLineID *primary_tli,
+ int *server_version);
typedef void (*walrcv_readtimelinehistoryfile_fn) (WalReceiverConn *conn,
TimeLineID tli,
char **filename,
char **content, int *size);
typedef bool (*walrcv_startstreaming_fn) (WalReceiverConn *conn,
- TimeLineID tli,
- XLogRecPtr startpoint,
- const char *slotname);
+ const WalRcvStreamOptions *options);
typedef void (*walrcv_endstreaming_fn) (WalReceiverConn *conn,
TimeLineID *next_tli);
typedef int (*walrcv_receive_fn) (WalReceiverConn *conn, char **buffer,
pgsocket *wait_fd);
typedef void (*walrcv_send_fn) (WalReceiverConn *conn, const char *buffer,
int nbytes);
+typedef char *(*walrcv_create_slot_fn) (WalReceiverConn *conn,
+ const char *slotname, bool temporary,
+ XLogRecPtr *lsn);
+typedef bool (*walrcv_command_fn) (WalReceiverConn *conn, const char *cmd,
+ char **err);
typedef void (*walrcv_disconnect_fn) (WalReceiverConn *conn);
typedef struct WalReceiverFunctionsType
{
walrcv_connect_fn walrcv_connect;
+ walrcv_check_conninfo_fn walrcv_check_conninfo;
walrcv_get_conninfo_fn walrcv_get_conninfo;
walrcv_identify_system_fn walrcv_identify_system;
walrcv_readtimelinehistoryfile_fn walrcv_readtimelinehistoryfile;
@@ -169,27 +199,35 @@ typedef struct WalReceiverFunctionsType
walrcv_endstreaming_fn walrcv_endstreaming;
walrcv_receive_fn walrcv_receive;
walrcv_send_fn walrcv_send;
+ walrcv_create_slot_fn walrcv_create_slot;
+ walrcv_command_fn walrcv_command;
walrcv_disconnect_fn walrcv_disconnect;
} WalReceiverFunctionsType;
extern PGDLLIMPORT WalReceiverFunctionsType *WalReceiverFunctions;
-#define walrcv_connect(conninfo, logical, appname) \
- WalReceiverFunctions->walrcv_connect(conninfo, logical, appname)
+#define walrcv_connect(conninfo, logical, appname, err) \
+ WalReceiverFunctions->walrcv_connect(conninfo, logical, appname, err)
+#define walrcv_check_conninfo(conninfo) \
+ WalReceiverFunctions->walrcv_check_conninfo(conninfo)
#define walrcv_get_conninfo(conn) \
WalReceiverFunctions->walrcv_get_conninfo(conn)
-#define walrcv_identify_system(conn, primary_tli) \
- WalReceiverFunctions->walrcv_identify_system(conn, primary_tli)
+#define walrcv_identify_system(conn, primary_tli, server_version) \
+ WalReceiverFunctions->walrcv_identify_system(conn, primary_tli, server_version)
#define walrcv_readtimelinehistoryfile(conn, tli, filename, content, size) \
WalReceiverFunctions->walrcv_readtimelinehistoryfile(conn, tli, filename, content, size)
-#define walrcv_startstreaming(conn, tli, startpoint, slotname) \
- WalReceiverFunctions->walrcv_startstreaming(conn, tli, startpoint, slotname)
+#define walrcv_startstreaming(conn, options) \
+ WalReceiverFunctions->walrcv_startstreaming(conn, options)
#define walrcv_endstreaming(conn, next_tli) \
WalReceiverFunctions->walrcv_endstreaming(conn, next_tli)
#define walrcv_receive(conn, buffer, wait_fd) \
WalReceiverFunctions->walrcv_receive(conn, buffer, wait_fd)
#define walrcv_send(conn, buffer, nbytes) \
WalReceiverFunctions->walrcv_send(conn, buffer, nbytes)
+#define walrcv_create_slot(conn, slotname, temporary, lsn) \
+ WalReceiverFunctions->walrcv_create_slot(conn, slotname, temporary, lsn)
+#define walrcv_command(conn, cmd, err) \
+ WalReceiverFunctions->walrcv_command(conn, cmd, err)
#define walrcv_disconnect(conn) \
WalReceiverFunctions->walrcv_disconnect(conn)
diff --git a/src/include/replication/worker_internal.h b/src/include/replication/worker_internal.h
new file mode 100644
index 0000000000..cecd2b8a1c
--- /dev/null
+++ b/src/include/replication/worker_internal.h
@@ -0,0 +1,62 @@
+/*-------------------------------------------------------------------------
+ *
+ * worker_internal.h
+ * Internal headers shared by logical replication workers.
+ *
+ * Portions Copyright (c) 2010-2016, PostgreSQL Global Development Group
+ *
+ * src/include/replication/worker_internal.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef WORKER_INTERNAL_H
+#define WORKER_INTERNAL_H
+
+#include "catalog/pg_subscription.h"
+#include "storage/lock.h"
+
+typedef struct LogicalRepWorker
+{
+ /* Pointer to proc array. NULL if not running. */
+ PGPROC *proc;
+
+ /* Database id to connect to. */
+ Oid dbid;
+
+ /* User to use for connection (will be same as owner of subscription). */
+ Oid userid;
+
+ /* Subscription id for the worker. */
+ Oid subid;
+
+ /* Used for initial table synchronization. */
+ Oid relid;
+
+ /* Stats. */
+ XLogRecPtr last_lsn;
+ TimestampTz last_send_time;
+ TimestampTz last_recv_time;
+ XLogRecPtr reply_lsn;
+ TimestampTz reply_time;
+} LogicalRepWorker;
+
+/* libpqreceiver connection */
+extern struct WalReceiverConn *wrconn;
+
+/* Worker and subscription objects. */
+extern Subscription *MySubscription;
+extern LogicalRepWorker *MyLogicalRepWorker;
+
+extern bool in_remote_transaction;
+extern bool got_SIGTERM;
+
+extern void logicalrep_worker_attach(int slot);
+extern LogicalRepWorker *logicalrep_worker_find(Oid subid);
+extern int logicalrep_worker_count(Oid subid);
+extern void logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid);
+extern void logicalrep_worker_stop(Oid subid);
+extern void logicalrep_worker_wakeup(Oid subid);
+
+extern void logicalrep_worker_sigterm(SIGNAL_ARGS);
+
+#endif /* WORKER_INTERNAL_H */
diff --git a/src/include/storage/sinval.h b/src/include/storage/sinval.h
index b7bab68e21..6a3db9580f 100644
--- a/src/include/storage/sinval.h
+++ b/src/include/storage/sinval.h
@@ -23,6 +23,7 @@
* * invalidate a specific tuple in a specific catcache
* * invalidate all catcache entries from a given system catalog
* * invalidate a relcache entry for a specific logical relation
+ * * invalidate all relcache entries
* * invalidate an smgr cache entry for a specific physical relation
* * invalidate the mapped-relation mapping for a given database
* * invalidate any saved snapshot that might be used to scan a given relation
@@ -78,7 +79,7 @@ typedef struct
{
int8 id; /* type field --- must be first */
Oid dbId; /* database ID, or 0 if a shared relation */
- Oid relId; /* relation ID */
+ Oid relId; /* relation ID, or 0 if whole relcache */
} SharedInvalRelcacheMsg;
#define SHAREDINVALSMGR_ID (-3)
diff --git a/src/include/utils/acl.h b/src/include/utils/acl.h
index f397ea1d54..686141b5f9 100644
--- a/src/include/utils/acl.h
+++ b/src/include/utils/acl.h
@@ -199,6 +199,8 @@ typedef enum AclObjectKind
ACL_KIND_FOREIGN_SERVER, /* pg_foreign_server */
ACL_KIND_EVENT_TRIGGER, /* pg_event_trigger */
ACL_KIND_EXTENSION, /* pg_extension */
+ ACL_KIND_PUBLICATION, /* pg_publication */
+ ACL_KIND_SUBSCRIPTION, /* pg_subscription */
MAX_ACL_KIND /* MUST BE LAST */
} AclObjectKind;
@@ -318,6 +320,8 @@ extern bool pg_foreign_data_wrapper_ownercheck(Oid srv_oid, Oid roleid);
extern bool pg_foreign_server_ownercheck(Oid srv_oid, Oid roleid);
extern bool pg_event_trigger_ownercheck(Oid et_oid, Oid roleid);
extern bool pg_extension_ownercheck(Oid ext_oid, Oid roleid);
+extern bool pg_publication_ownercheck(Oid pub_oid, Oid roleid);
+extern bool pg_subscription_ownercheck(Oid sub_oid, Oid roleid);
extern bool has_createrole_privilege(Oid roleid);
extern bool has_bypassrls_privilege(Oid roleid);
diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h
index d036e9f1ae..afbe354b4d 100644
--- a/src/include/utils/inval.h
+++ b/src/include/utils/inval.h
@@ -43,6 +43,8 @@ extern void CacheInvalidateCatalog(Oid catalogId);
extern void CacheInvalidateRelcache(Relation relation);
+extern void CacheInvalidateRelcacheAll(void);
+
extern void CacheInvalidateRelcacheByTuple(HeapTuple classTuple);
extern void CacheInvalidateRelcacheByRelid(Oid relid);
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index efef1cedfe..a1750accc2 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -18,6 +18,7 @@
#include "access/xlog.h"
#include "catalog/pg_class.h"
#include "catalog/pg_index.h"
+#include "catalog/pg_publication.h"
#include "fmgr.h"
#include "nodes/bitmapset.h"
#include "rewrite/prs2lock.h"
@@ -132,13 +133,17 @@ typedef struct RelationData
/* data managed by RelationGetIndexList: */
List *rd_indexlist; /* list of OIDs of indexes on relation */
Oid rd_oidindex; /* OID of unique index on OID, if any */
+ Oid rd_pkindex; /* OID of primary key, if any */
Oid rd_replidindex; /* OID of replica identity index, if any */
/* data managed by RelationGetIndexAttrBitmap: */
Bitmapset *rd_indexattr; /* identifies columns used in indexes */
Bitmapset *rd_keyattr; /* cols that can be ref'd by foreign keys */
+ Bitmapset *rd_pkattr; /* cols included in primary key */
Bitmapset *rd_idattr; /* included in replica identity index */
+ PublicationActions *rd_pubactions; /* publication actions */
+
/*
* rd_options is set whenever rd_rel is loaded into the relcache entry.
* Note that you can NOT look into rd_rel for this data. NULL means "use
@@ -627,5 +632,6 @@ get_partition_col_typmod(PartitionKey key, int col)
extern void RelationIncrementReferenceCount(Relation rel);
extern void RelationDecrementReferenceCount(Relation rel);
extern bool RelationHasUnloggedIndex(Relation rel);
+extern List *RelationGetRepsetList(Relation rel);
#endif /* REL_H */
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h
index a2157b1562..da36b6774f 100644
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@@ -40,6 +40,7 @@ extern void RelationClose(Relation relation);
extern List *RelationGetFKeyList(Relation relation);
extern List *RelationGetIndexList(Relation relation);
extern Oid RelationGetOidIndex(Relation relation);
+extern Oid RelationGetPrimaryKeyIndex(Relation relation);
extern Oid RelationGetReplicaIndex(Relation relation);
extern List *RelationGetIndexExpressions(Relation relation);
extern List *RelationGetIndexPredicate(Relation relation);
@@ -48,6 +49,7 @@ typedef enum IndexAttrBitmapKind
{
INDEX_ATTR_BITMAP_ALL,
INDEX_ATTR_BITMAP_KEY,
+ INDEX_ATTR_BITMAP_PRIMARY_KEY,
INDEX_ATTR_BITMAP_IDENTITY_KEY
} IndexAttrBitmapKind;
@@ -64,6 +66,10 @@ extern void RelationSetIndexList(Relation relation,
extern void RelationInitIndexAccessInfo(Relation relation);
+/* caller must include pg_publication.h */
+struct PublicationActions;
+extern struct PublicationActions *GetRelationPublicationActions(Relation relation);
+
/*
* Routines to support ereport() reports of relation-related errors
*/
diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h
index e4bb62b1e4..66f60d271e 100644
--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -80,9 +80,15 @@ enum SysCacheIdentifier
RELOID,
REPLORIGIDENT,
REPLORIGNAME,
+ PUBLICATIONOID,
+ PUBLICATIONNAME,
+ PUBLICATIONREL,
+ PUBLICATIONRELMAP,
RULERELNAME,
SEQRELID,
STATRELATTINH,
+ SUBSCRIPTIONOID,
+ SUBSCRIPTIONNAME,
TABLESPACEOID,
TRFOID,
TRFTYPELANG,
diff --git a/src/test/Makefile b/src/test/Makefile
index 6b40cf50ed..3c2215849e 100644
--- a/src/test/Makefile
+++ b/src/test/Makefile
@@ -12,7 +12,7 @@ subdir = src/test
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
-SUBDIRS = perl regress isolation modules recovery
+SUBDIRS = perl regress isolation modules recovery subscription
# We don't build or execute examples/, locale/, or thread/ by default,
# but we do want "make clean" etc to recurse into them. Likewise for ssl/,
diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm
index 932478183a..18d5d12454 100644
--- a/src/test/perl/PostgresNode.pm
+++ b/src/test/perl/PostgresNode.pm
@@ -380,7 +380,9 @@ WAL archiving can be enabled on this node by passing the keyword parameter
has_archiving => 1. This is disabled by default.
postgresql.conf can be set up for replication by passing the keyword
-parameter allows_streaming => 1. This is disabled by default.
+parameter allows_streaming => 'logical' or 'physical' (passing 1 will also
+suffice for physical replication) depending on type of replication that
+should be enabled. This is disabled by default.
The new node is set up in a fast but unsafe configuration where fsync is
disabled.
@@ -415,7 +417,16 @@ sub init
if ($params{allows_streaming})
{
+ if ($params{allows_streaming} eq "logical")
+ {
+ print $conf "wal_level = logical\n";
+ }
+ else
+ {
+ print $conf "wal_level = replica\n";
+ }
print $conf "max_wal_senders = 5\n";
+ print $conf "max_replication_slots = 5\n";
print $conf "wal_keep_segments = 20\n";
print $conf "max_wal_size = 128MB\n";
print $conf "shared_buffers = 1MB\n";
diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out
new file mode 100644
index 0000000000..47b04f1a57
--- /dev/null
+++ b/src/test/regress/expected/publication.out
@@ -0,0 +1,156 @@
+--
+-- PUBLICATION
+--
+CREATE ROLE regress_publication_user LOGIN SUPERUSER;
+SET SESSION AUTHORIZATION 'regress_publication_user';
+CREATE PUBLICATION testpub_default;
+CREATE PUBLICATION testpib_ins_trunct WITH (nopublish delete, nopublish update);
+ALTER PUBLICATION testpub_default WITH (nopublish insert, nopublish delete);
+\dRp
+ List of publications
+ Name | Owner | Inserts | Updates | Deletes
+--------------------+--------------------------+---------+---------+---------
+ testpib_ins_trunct | regress_publication_user | t | f | f
+ testpub_default | regress_publication_user | f | t | f
+(2 rows)
+
+ALTER PUBLICATION testpub_default WITH (publish insert, publish delete);
+\dRp
+ List of publications
+ Name | Owner | Inserts | Updates | Deletes
+--------------------+--------------------------+---------+---------+---------
+ testpib_ins_trunct | regress_publication_user | t | f | f
+ testpub_default | regress_publication_user | t | t | t
+(2 rows)
+
+--- adding tables
+CREATE SCHEMA pub_test;
+CREATE TABLE testpub_tbl1 (id serial primary key, data text);
+CREATE TABLE pub_test.testpub_nopk (foo int, bar int);
+CREATE VIEW testpub_view AS SELECT 1;
+CREATE PUBLICATION testpub_foralltables FOR ALL TABLES WITH (nopublish delete, nopublish update);
+ALTER PUBLICATION testpub_foralltables WITH (publish update);
+CREATE TABLE testpub_tbl2 (id serial primary key, data text);
+-- fail - can't add to for all tables publication
+ALTER PUBLICATION testpub_foralltables ADD TABLE testpub_tbl2;
+ERROR: publication "testpub_foralltables" is defined as FOR ALL TABLES
+DETAIL: Tables cannot be added to or dropped from FOR ALL TABLES publications.
+-- fail - can't drop from all tables publication
+ALTER PUBLICATION testpub_foralltables DROP TABLE testpub_tbl2;
+ERROR: publication "testpub_foralltables" is defined as FOR ALL TABLES
+DETAIL: Tables cannot be added to or dropped from FOR ALL TABLES publications.
+-- fail - can't add to for all tables publication
+ALTER PUBLICATION testpub_foralltables SET TABLE pub_test.testpub_nopk;
+ERROR: publication "testpub_foralltables" is defined as FOR ALL TABLES
+DETAIL: Tables cannot be added to or dropped from FOR ALL TABLES publications.
+SELECT pubname, puballtables FROM pg_publication WHERE pubname = 'testpub_foralltables';
+ pubname | puballtables
+----------------------+--------------
+ testpub_foralltables | t
+(1 row)
+
+\d+ testpub_tbl2
+ Table "public.testpub_tbl2"
+ Column | Type | Collation | Nullable | Default | Storage | Stats target | Description
+--------+---------+-----------+----------+------------------------------------------+----------+--------------+-------------
+ id | integer | | not null | nextval('testpub_tbl2_id_seq'::regclass) | plain | |
+ data | text | | | | extended | |
+Indexes:
+ "testpub_tbl2_pkey" PRIMARY KEY, btree (id)
+Publications:
+ "testpub_foralltables"
+
+DROP TABLE testpub_tbl2;
+DROP PUBLICATION testpub_foralltables;
+-- fail - view
+CREATE PUBLICATION testpub_fortbl FOR TABLE testpub_view;
+ERROR: "testpub_view" is not a table
+DETAIL: Only tables can be added to publications.
+CREATE PUBLICATION testpub_fortbl FOR TABLE testpub_tbl1, pub_test.testpub_nopk;
+-- fail - already added
+ALTER PUBLICATION testpub_fortbl ADD TABLE testpub_tbl1;
+ERROR: relation "testpub_tbl1" is already member of publication "testpub_fortbl"
+-- fail - already added
+CREATE PUBLICATION testpub_fortbl FOR TABLE testpub_tbl1;
+ERROR: publication "testpub_fortbl" already exists
+\dRp+ testpub_fortbl
+ Publication testpub_fortbl
+ Inserts | Updates | Deletes
+---------+---------+---------
+ t | t | t
+Tables:
+ "pub_test.testpub_nopk"
+ "public.testpub_tbl1"
+
+-- fail - view
+ALTER PUBLICATION testpub_default ADD TABLE testpub_view;
+ERROR: "testpub_view" is not a table
+DETAIL: Only tables can be added to publications.
+ALTER PUBLICATION testpub_default ADD TABLE testpub_tbl1;
+ALTER PUBLICATION testpub_default SET TABLE testpub_tbl1;
+ALTER PUBLICATION testpub_default ADD TABLE pub_test.testpub_nopk;
+ALTER PUBLICATION testpib_ins_trunct ADD TABLE pub_test.testpub_nopk, testpub_tbl1;
+\d+ pub_test.testpub_nopk
+ Table "pub_test.testpub_nopk"
+ Column | Type | Collation | Nullable | Default | Storage | Stats target | Description
+--------+---------+-----------+----------+---------+---------+--------------+-------------
+ foo | integer | | | | plain | |
+ bar | integer | | | | plain | |
+Publications:
+ "testpib_ins_trunct"
+ "testpub_default"
+ "testpub_fortbl"
+
+\d+ testpub_tbl1
+ Table "public.testpub_tbl1"
+ Column | Type | Collation | Nullable | Default | Storage | Stats target | Description
+--------+---------+-----------+----------+------------------------------------------+----------+--------------+-------------
+ id | integer | | not null | nextval('testpub_tbl1_id_seq'::regclass) | plain | |
+ data | text | | | | extended | |
+Indexes:
+ "testpub_tbl1_pkey" PRIMARY KEY, btree (id)
+Publications:
+ "testpib_ins_trunct"
+ "testpub_default"
+ "testpub_fortbl"
+
+\dRp+ testpub_default
+ Publication testpub_default
+ Inserts | Updates | Deletes
+---------+---------+---------
+ t | t | t
+Tables:
+ "pub_test.testpub_nopk"
+ "public.testpub_tbl1"
+
+ALTER PUBLICATION testpub_default DROP TABLE testpub_tbl1, pub_test.testpub_nopk;
+-- fail - nonexistent
+ALTER PUBLICATION testpub_default DROP TABLE pub_test.testpub_nopk;
+ERROR: relation "testpub_nopk" is not part of the publication
+\d+ testpub_tbl1
+ Table "public.testpub_tbl1"
+ Column | Type | Collation | Nullable | Default | Storage | Stats target | Description
+--------+---------+-----------+----------+------------------------------------------+----------+--------------+-------------
+ id | integer | | not null | nextval('testpub_tbl1_id_seq'::regclass) | plain | |
+ data | text | | | | extended | |
+Indexes:
+ "testpub_tbl1_pkey" PRIMARY KEY, btree (id)
+Publications:
+ "testpib_ins_trunct"
+ "testpub_fortbl"
+
+DROP VIEW testpub_view;
+DROP TABLE testpub_tbl1;
+\dRp+ testpub_default
+ Publication testpub_default
+ Inserts | Updates | Deletes
+---------+---------+---------
+ t | t | t
+(1 row)
+
+DROP PUBLICATION testpub_default;
+DROP PUBLICATION testpib_ins_trunct;
+DROP SCHEMA pub_test CASCADE;
+NOTICE: drop cascades to table pub_test.testpub_nopk
+RESET SESSION AUTHORIZATION;
+DROP ROLE regress_publication_user;
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index add6adc871..60abcad101 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1417,6 +1417,14 @@ pg_prepared_xacts| SELECT p.transaction,
FROM ((pg_prepared_xact() p(transaction, gid, prepared, ownerid, dbid)
LEFT JOIN pg_authid u ON ((p.ownerid = u.oid)))
LEFT JOIN pg_database d ON ((p.dbid = d.oid)));
+pg_publication_tables| SELECT p.pubname,
+ n.nspname AS schemaname,
+ c.relname AS tablename
+ FROM pg_publication p,
+ (pg_class c
+ JOIN pg_namespace n ON ((n.oid = c.relnamespace)))
+ WHERE (c.oid IN ( SELECT pg_get_publication_tables.relid
+ FROM pg_get_publication_tables((p.pubname)::text) pg_get_publication_tables(relid)));
pg_replication_origin_status| SELECT pg_show_replication_origin_status.local_id,
pg_show_replication_origin_status.external_id,
pg_show_replication_origin_status.remote_lsn,
@@ -1822,6 +1830,16 @@ pg_stat_ssl| SELECT s.pid,
s.sslcompression AS compression,
s.sslclientdn AS clientdn
FROM pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, ssl, sslversion, sslcipher, sslbits, sslcompression, sslclientdn);
+pg_stat_subscription| SELECT su.oid AS subid,
+ su.subname,
+ st.pid,
+ st.received_lsn,
+ st.last_msg_send_time,
+ st.last_msg_receipt_time,
+ st.latest_end_lsn,
+ st.latest_end_time
+ FROM (pg_subscription su
+ LEFT JOIN pg_stat_get_subscription(NULL::oid) st(subid, pid, received_lsn, last_msg_send_time, last_msg_receipt_time, latest_end_lsn, latest_end_time) ON ((st.subid = su.oid)));
pg_stat_sys_indexes| SELECT pg_stat_all_indexes.relid,
pg_stat_all_indexes.indexrelid,
pg_stat_all_indexes.schemaname,
diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out
index 7ad68c745b..0af013f8a2 100644
--- a/src/test/regress/expected/sanity_check.out
+++ b/src/test/regress/expected/sanity_check.out
@@ -124,6 +124,8 @@ pg_partitioned_table|t
pg_pltemplate|t
pg_policy|t
pg_proc|t
+pg_publication|t
+pg_publication_rel|t
pg_range|t
pg_replication_origin|t
pg_rewrite|t
@@ -133,6 +135,7 @@ pg_shdepend|t
pg_shdescription|t
pg_shseclabel|t
pg_statistic|t
+pg_subscription|t
pg_tablespace|t
pg_transform|t
pg_trigger|t
diff --git a/src/test/regress/expected/subscription.out b/src/test/regress/expected/subscription.out
new file mode 100644
index 0000000000..2ccec98b15
--- /dev/null
+++ b/src/test/regress/expected/subscription.out
@@ -0,0 +1,66 @@
+--
+-- SUBSCRIPTION
+--
+CREATE ROLE regress_subscription_user LOGIN SUPERUSER;
+SET SESSION AUTHORIZATION 'regress_subscription_user';
+-- fail - no publications
+CREATE SUBSCRIPTION testsub CONNECTION 'foo';
+ERROR: syntax error at or near ";"
+LINE 1: CREATE SUBSCRIPTION testsub CONNECTION 'foo';
+ ^
+-- fail - no connection
+CREATE SUBSCRIPTION testsub PUBLICATION foo;
+ERROR: syntax error at or near "PUBLICATION"
+LINE 1: CREATE SUBSCRIPTION testsub PUBLICATION foo;
+ ^
+set client_min_messages to error;
+CREATE SUBSCRIPTION testsub CONNECTION 'testconn' PUBLICATION testpub;
+ERROR: invalid connection string syntax: missing "=" after "testconn" in connection info string
+
+CREATE SUBSCRIPTION testsub CONNECTION 'dbname=doesnotexist' PUBLICATION testpub WITH (DISABLED, NOCREATE SLOT);
+reset client_min_messages;
+\dRs+
+ List of subscriptions
+ Name | Owner | Enabled | Publication | Conninfo
+---------+---------------------------+---------+-------------+---------------------
+ testsub | regress_subscription_user | f | {testpub} | dbname=doesnotexist
+(1 row)
+
+ALTER SUBSCRIPTION testsub SET PUBLICATION testpub2, testpub3;
+\dRs
+ List of subscriptions
+ Name | Owner | Enabled | Publication
+---------+---------------------------+---------+---------------------
+ testsub | regress_subscription_user | f | {testpub2,testpub3}
+(1 row)
+
+ALTER SUBSCRIPTION testsub CONNECTION 'dbname=doesnotexist2';
+ALTER SUBSCRIPTION testsub SET PUBLICATION testpub, testpub1;
+\dRs+
+ List of subscriptions
+ Name | Owner | Enabled | Publication | Conninfo
+---------+---------------------------+---------+--------------------+----------------------
+ testsub | regress_subscription_user | f | {testpub,testpub1} | dbname=doesnotexist2
+(1 row)
+
+BEGIN;
+ALTER SUBSCRIPTION testsub ENABLE;
+\dRs
+ List of subscriptions
+ Name | Owner | Enabled | Publication
+---------+---------------------------+---------+--------------------
+ testsub | regress_subscription_user | t | {testpub,testpub1}
+(1 row)
+
+ALTER SUBSCRIPTION testsub DISABLE;
+\dRs
+ List of subscriptions
+ Name | Owner | Enabled | Publication
+---------+---------------------------+---------+--------------------
+ testsub | regress_subscription_user | f | {testpub,testpub1}
+(1 row)
+
+COMMIT;
+DROP SUBSCRIPTION testsub NODROP SLOT;
+RESET SESSION AUTHORIZATION;
+DROP ROLE regress_subscription_user;
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 8641769351..e9b2bad6fd 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -97,6 +97,9 @@ test: rules psql_crosstab amutils
# run by itself so it can run parallel workers
test: select_parallel
+# no relation related tests can be put in this group
+test: publication subscription
+
# ----------
# Another group of parallel tests
# ----------
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index 835cf3556c..7cdc0f6a69 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -127,6 +127,8 @@ test: tsrf
test: rules
test: psql_crosstab
test: select_parallel
+test: publication
+test: subscription
test: amutils
test: select_views
test: portals_p2
diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql
new file mode 100644
index 0000000000..89a31672fa
--- /dev/null
+++ b/src/test/regress/sql/publication.sql
@@ -0,0 +1,82 @@
+--
+-- PUBLICATION
+--
+CREATE ROLE regress_publication_user LOGIN SUPERUSER;
+SET SESSION AUTHORIZATION 'regress_publication_user';
+
+CREATE PUBLICATION testpub_default;
+
+CREATE PUBLICATION testpib_ins_trunct WITH (nopublish delete, nopublish update);
+
+ALTER PUBLICATION testpub_default WITH (nopublish insert, nopublish delete);
+
+\dRp
+
+ALTER PUBLICATION testpub_default WITH (publish insert, publish delete);
+
+\dRp
+
+--- adding tables
+CREATE SCHEMA pub_test;
+CREATE TABLE testpub_tbl1 (id serial primary key, data text);
+CREATE TABLE pub_test.testpub_nopk (foo int, bar int);
+CREATE VIEW testpub_view AS SELECT 1;
+
+CREATE PUBLICATION testpub_foralltables FOR ALL TABLES WITH (nopublish delete, nopublish update);
+ALTER PUBLICATION testpub_foralltables WITH (publish update);
+
+CREATE TABLE testpub_tbl2 (id serial primary key, data text);
+-- fail - can't add to for all tables publication
+ALTER PUBLICATION testpub_foralltables ADD TABLE testpub_tbl2;
+-- fail - can't drop from all tables publication
+ALTER PUBLICATION testpub_foralltables DROP TABLE testpub_tbl2;
+-- fail - can't add to for all tables publication
+ALTER PUBLICATION testpub_foralltables SET TABLE pub_test.testpub_nopk;
+
+SELECT pubname, puballtables FROM pg_publication WHERE pubname = 'testpub_foralltables';
+\d+ testpub_tbl2
+
+DROP TABLE testpub_tbl2;
+DROP PUBLICATION testpub_foralltables;
+
+-- fail - view
+CREATE PUBLICATION testpub_fortbl FOR TABLE testpub_view;
+CREATE PUBLICATION testpub_fortbl FOR TABLE testpub_tbl1, pub_test.testpub_nopk;
+-- fail - already added
+ALTER PUBLICATION testpub_fortbl ADD TABLE testpub_tbl1;
+-- fail - already added
+CREATE PUBLICATION testpub_fortbl FOR TABLE testpub_tbl1;
+
+\dRp+ testpub_fortbl
+
+-- fail - view
+ALTER PUBLICATION testpub_default ADD TABLE testpub_view;
+
+ALTER PUBLICATION testpub_default ADD TABLE testpub_tbl1;
+ALTER PUBLICATION testpub_default SET TABLE testpub_tbl1;
+ALTER PUBLICATION testpub_default ADD TABLE pub_test.testpub_nopk;
+
+ALTER PUBLICATION testpib_ins_trunct ADD TABLE pub_test.testpub_nopk, testpub_tbl1;
+
+\d+ pub_test.testpub_nopk
+\d+ testpub_tbl1
+\dRp+ testpub_default
+
+ALTER PUBLICATION testpub_default DROP TABLE testpub_tbl1, pub_test.testpub_nopk;
+-- fail - nonexistent
+ALTER PUBLICATION testpub_default DROP TABLE pub_test.testpub_nopk;
+
+\d+ testpub_tbl1
+
+DROP VIEW testpub_view;
+DROP TABLE testpub_tbl1;
+
+\dRp+ testpub_default
+
+DROP PUBLICATION testpub_default;
+DROP PUBLICATION testpib_ins_trunct;
+
+DROP SCHEMA pub_test CASCADE;
+
+RESET SESSION AUTHORIZATION;
+DROP ROLE regress_publication_user;
diff --git a/src/test/regress/sql/subscription.sql b/src/test/regress/sql/subscription.sql
new file mode 100644
index 0000000000..68c17d5cfd
--- /dev/null
+++ b/src/test/regress/sql/subscription.sql
@@ -0,0 +1,44 @@
+--
+-- SUBSCRIPTION
+--
+
+CREATE ROLE regress_subscription_user LOGIN SUPERUSER;
+SET SESSION AUTHORIZATION 'regress_subscription_user';
+
+-- fail - no publications
+CREATE SUBSCRIPTION testsub CONNECTION 'foo';
+
+-- fail - no connection
+CREATE SUBSCRIPTION testsub PUBLICATION foo;
+
+set client_min_messages to error;
+CREATE SUBSCRIPTION testsub CONNECTION 'testconn' PUBLICATION testpub;
+CREATE SUBSCRIPTION testsub CONNECTION 'dbname=doesnotexist' PUBLICATION testpub WITH (DISABLED, NOCREATE SLOT);
+reset client_min_messages;
+
+\dRs+
+
+ALTER SUBSCRIPTION testsub SET PUBLICATION testpub2, testpub3;
+
+\dRs
+
+ALTER SUBSCRIPTION testsub CONNECTION 'dbname=doesnotexist2';
+ALTER SUBSCRIPTION testsub SET PUBLICATION testpub, testpub1;
+
+\dRs+
+
+BEGIN;
+ALTER SUBSCRIPTION testsub ENABLE;
+
+\dRs
+
+ALTER SUBSCRIPTION testsub DISABLE;
+
+\dRs
+
+COMMIT;
+
+DROP SUBSCRIPTION testsub NODROP SLOT;
+
+RESET SESSION AUTHORIZATION;
+DROP ROLE regress_subscription_user;
diff --git a/src/test/subscription/.gitignore b/src/test/subscription/.gitignore
new file mode 100644
index 0000000000..871e943d50
--- /dev/null
+++ b/src/test/subscription/.gitignore
@@ -0,0 +1,2 @@
+# Generated by test suite
+/tmp_check/
diff --git a/src/test/subscription/Makefile b/src/test/subscription/Makefile
new file mode 100644
index 0000000000..bb9795453a
--- /dev/null
+++ b/src/test/subscription/Makefile
@@ -0,0 +1,22 @@
+#-------------------------------------------------------------------------
+#
+# Makefile for src/test/subscription
+#
+# Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+# Portions Copyright (c) 1994, Regents of the University of California
+#
+# src/test/subscription/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/test/subscription
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+EXTRA_INSTALL = contrib/hstore
+
+check:
+ $(prove_check)
+
+clean distclean maintainer-clean:
+ rm -rf tmp_check
diff --git a/src/test/subscription/README b/src/test/subscription/README
new file mode 100644
index 0000000000..e9e93755b7
--- /dev/null
+++ b/src/test/subscription/README
@@ -0,0 +1,16 @@
+src/test/subscription/README
+
+Regression tests for subscription/logical replication
+=====================================================
+
+This directory contains a test suite for subscription/logical replication.
+
+Running the tests
+=================
+
+ make check
+
+NOTE: This creates a temporary installation, and some tests may
+create one or multiple nodes, for the purpose of the tests.
+
+NOTE: This requires the --enable-tap-tests argument to configure.
diff --git a/src/test/subscription/t/001_rep_changes.pl b/src/test/subscription/t/001_rep_changes.pl
new file mode 100644
index 0000000000..b51740bcd4
--- /dev/null
+++ b/src/test/subscription/t/001_rep_changes.pl
@@ -0,0 +1,188 @@
+# Basic logical replication test
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 11;
+
+# Initialize publisher node
+my $node_publisher = get_new_node('publisher');
+$node_publisher->init(allows_streaming => 'logical');
+$node_publisher->start;
+
+# Create subscriber node
+my $node_subscriber = get_new_node('subscriber');
+$node_subscriber->init(allows_streaming => 'logical');
+$node_subscriber->start;
+
+# Create some preexisting content on publisher
+$node_publisher->safe_psql('postgres',
+ "CREATE TABLE tab_notrep AS SELECT generate_series(1,10) AS a");
+$node_publisher->safe_psql('postgres',
+ "CREATE TABLE tab_ins (a int)");
+$node_publisher->safe_psql('postgres',
+ "CREATE TABLE tab_full AS SELECT generate_series(1,10) AS a");
+$node_publisher->safe_psql('postgres',
+ "CREATE TABLE tab_rep (a int primary key)");
+
+# Setup structure on subscriber
+$node_subscriber->safe_psql('postgres',
+ "CREATE TABLE tab_notrep (a int)");
+$node_subscriber->safe_psql('postgres',
+ "CREATE TABLE tab_ins (a int)");
+$node_subscriber->safe_psql('postgres',
+ "CREATE TABLE tab_full (a int)");
+$node_subscriber->safe_psql('postgres',
+ "CREATE TABLE tab_rep (a int primary key)");
+
+# Setup logical replication
+my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
+$node_publisher->safe_psql('postgres',
+ "CREATE PUBLICATION tap_pub");
+$node_publisher->safe_psql('postgres',
+ "CREATE PUBLICATION tap_pub_ins_only WITH (nopublish delete, nopublish update)");
+$node_publisher->safe_psql('postgres',
+ "ALTER PUBLICATION tap_pub ADD TABLE tab_rep, tab_full");
+$node_publisher->safe_psql('postgres',
+ "ALTER PUBLICATION tap_pub_ins_only ADD TABLE tab_ins");
+
+my $appname = 'tap_sub';
+$node_subscriber->safe_psql('postgres',
+ "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub, tap_pub_ins_only");
+
+# Wait for subscriber to finish initialization
+my $caughtup_query =
+"SELECT pg_current_xlog_location() <= replay_location FROM pg_stat_replication WHERE application_name = '$appname';";
+$node_publisher->poll_query_until('postgres', $caughtup_query)
+ or die "Timed out while waiting for subscriber to catch up";
+
+my $result =
+ $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_notrep");
+is($result, qq(0), 'check non-replicated table is empty on subscriber');
+
+$node_publisher->safe_psql('postgres',
+ "INSERT INTO tab_ins SELECT generate_series(1,50)");
+$node_publisher->safe_psql('postgres',
+ "DELETE FROM tab_ins WHERE a > 20");
+$node_publisher->safe_psql('postgres',
+ "UPDATE tab_ins SET a = -a");
+
+$node_publisher->safe_psql('postgres',
+ "INSERT INTO tab_rep SELECT generate_series(1,50)");
+$node_publisher->safe_psql('postgres',
+ "DELETE FROM tab_rep WHERE a > 20");
+$node_publisher->safe_psql('postgres',
+ "UPDATE tab_rep SET a = -a");
+
+$node_publisher->poll_query_until('postgres', $caughtup_query)
+ or die "Timed out while waiting for subscriber to catch up";
+
+$result =
+ $node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_ins");
+is($result, qq(50|1|50), 'check replicated inserts on subscriber');
+
+$result =
+ $node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_rep");
+is($result, qq(20|-20|-1), 'check replicated changes on subscriber');
+
+# insert some duplicate rows
+$node_publisher->safe_psql('postgres',
+ "INSERT INTO tab_full SELECT generate_series(1,10)");
+
+# add REPLICA IDENTITY FULL so we can update
+$node_publisher->safe_psql('postgres',
+ "ALTER TABLE tab_full REPLICA IDENTITY FULL");
+$node_subscriber->safe_psql('postgres',
+ "ALTER TABLE tab_full REPLICA IDENTITY FULL");
+$node_publisher->safe_psql('postgres',
+ "ALTER TABLE tab_ins REPLICA IDENTITY FULL");
+$node_subscriber->safe_psql('postgres',
+ "ALTER TABLE tab_ins REPLICA IDENTITY FULL");
+
+# and do the update
+$node_publisher->safe_psql('postgres',
+ "UPDATE tab_full SET a = a * a");
+
+# Wait for subscription to catch up
+$node_publisher->poll_query_until('postgres', $caughtup_query)
+ or die "Timed out while waiting for subscriber to catch up";
+
+$result =
+ $node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_full");
+is($result, qq(10|1|100), 'update works with REPLICA IDENTITY FULL and duplicate tuples');
+
+# check that change of connection string and/or publication list causes
+# restart of subscription workers. Not all of these are registered as tests
+# as we need to poll for a change but the test suite will fail none the less
+# when something goes wrong.
+my $oldpid = $node_publisher->safe_psql('postgres',
+ "SELECT pid FROM pg_stat_replication WHERE application_name = '$appname';");
+$node_subscriber->safe_psql('postgres',
+ "ALTER SUBSCRIPTION tap_sub CONNECTION 'application_name=$appname $publisher_connstr'");
+$node_publisher->poll_query_until('postgres',
+ "SELECT pid != $oldpid FROM pg_stat_replication WHERE application_name = '$appname';")
+ or die "Timed out while waiting for apply to restart";
+
+$oldpid = $node_publisher->safe_psql('postgres',
+ "SELECT pid FROM pg_stat_replication WHERE application_name = '$appname';");
+$node_subscriber->safe_psql('postgres',
+ "ALTER SUBSCRIPTION tap_sub SET PUBLICATION tap_pub_ins_only");
+$node_publisher->poll_query_until('postgres',
+ "SELECT pid != $oldpid FROM pg_stat_replication WHERE application_name = '$appname';")
+ or die "Timed out while waiting for apply to restart";
+
+$node_publisher->safe_psql('postgres',
+ "INSERT INTO tab_ins SELECT generate_series(1001,1100)");
+$node_publisher->safe_psql('postgres',
+ "DELETE FROM tab_rep");
+
+$node_publisher->poll_query_until('postgres', $caughtup_query)
+ or die "Timed out while waiting for subscriber to catch up";
+
+$result =
+ $node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_ins");
+is($result, qq(150|1|1100), 'check replicated inserts after subscription publication change');
+
+$result =
+ $node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_rep");
+is($result, qq(20|-20|-1), 'check changes skipped after subscription publication change');
+
+# check alter publication (relcache invalidation etc)
+$node_publisher->safe_psql('postgres',
+ "ALTER PUBLICATION tap_pub_ins_only WITH (publish delete)");
+$node_publisher->safe_psql('postgres',
+ "ALTER PUBLICATION tap_pub_ins_only ADD TABLE tab_full");
+$node_publisher->safe_psql('postgres',
+ "DELETE FROM tab_ins WHERE a > 0");
+$node_publisher->safe_psql('postgres',
+ "INSERT INTO tab_full VALUES(0)");
+
+$node_publisher->poll_query_until('postgres', $caughtup_query)
+ or die "Timed out while waiting for subscriber to catch up";
+
+# note that data are different on provider and subscriber
+$result =
+ $node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_ins");
+is($result, qq(50|1|50), 'check replicated deletes after alter publication');
+
+$result =
+ $node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_full");
+is($result, qq(11|0|100), 'check replicated insert after alter publication');
+
+# check all the cleanup
+$node_subscriber->safe_psql('postgres', "DROP SUBSCRIPTION tap_sub");
+
+$result =
+ $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM pg_subscription");
+is($result, qq(0), 'check subscription was dropped on subscriber');
+
+$result =
+ $node_publisher->safe_psql('postgres', "SELECT count(*) FROM pg_replication_slots");
+is($result, qq(0), 'check replication slot was dropped on publisher');
+
+$result =
+ $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM pg_replication_origin");
+is($result, qq(0), 'check replication origin was dropped on subscriber');
+
+$node_subscriber->stop('fast');
+$node_publisher->stop('fast');
diff --git a/src/test/subscription/t/002_types.pl b/src/test/subscription/t/002_types.pl
new file mode 100644
index 0000000000..9064eb4c6d
--- /dev/null
+++ b/src/test/subscription/t/002_types.pl
@@ -0,0 +1,539 @@
+# This tests that more complex datatypes are replicated correctly
+# by logical replication
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 3;
+
+# Initialize publisher node
+my $node_publisher = get_new_node('publisher');
+$node_publisher->init(allows_streaming => 'logical');
+$node_publisher->start;
+
+# Create subscriber node
+my $node_subscriber = get_new_node('subscriber');
+$node_subscriber->init(allows_streaming => 'logical');
+$node_subscriber->start;
+
+# Create some preexisting content on publisher
+my $ddl = qq(
+ CREATE EXTENSION hstore WITH SCHEMA public;
+ CREATE TABLE public.tst_one_array (
+ a INTEGER PRIMARY KEY,
+ b INTEGER[]
+ );
+ CREATE TABLE public.tst_arrays (
+ a INTEGER[] PRIMARY KEY,
+ b TEXT[],
+ c FLOAT[],
+ d INTERVAL[]
+ );
+
+ CREATE TYPE public.tst_enum_t AS ENUM ('a', 'b', 'c', 'd', 'e');
+ CREATE TABLE public.tst_one_enum (
+ a INTEGER PRIMARY KEY,
+ b public.tst_enum_t
+ );
+ CREATE TABLE public.tst_enums (
+ a public.tst_enum_t PRIMARY KEY,
+ b public.tst_enum_t[]
+ );
+
+ CREATE TYPE public.tst_comp_basic_t AS (a FLOAT, b TEXT, c INTEGER);
+ CREATE TYPE public.tst_comp_enum_t AS (a FLOAT, b public.tst_enum_t, c INTEGER);
+ CREATE TYPE public.tst_comp_enum_array_t AS (a FLOAT, b public.tst_enum_t[], c INTEGER);
+ CREATE TABLE public.tst_one_comp (
+ a INTEGER PRIMARY KEY,
+ b public.tst_comp_basic_t
+ );
+ CREATE TABLE public.tst_comps (
+ a public.tst_comp_basic_t PRIMARY KEY,
+ b public.tst_comp_basic_t[]
+ );
+ CREATE TABLE public.tst_comp_enum (
+ a INTEGER PRIMARY KEY,
+ b public.tst_comp_enum_t
+ );
+ CREATE TABLE public.tst_comp_enum_array (
+ a public.tst_comp_enum_t PRIMARY KEY,
+ b public.tst_comp_enum_t[]
+ );
+ CREATE TABLE public.tst_comp_one_enum_array (
+ a INTEGER PRIMARY KEY,
+ b public.tst_comp_enum_array_t
+ );
+ CREATE TABLE public.tst_comp_enum_what (
+ a public.tst_comp_enum_array_t PRIMARY KEY,
+ b public.tst_comp_enum_array_t[]
+ );
+
+ CREATE TYPE public.tst_comp_mix_t AS (
+ a public.tst_comp_basic_t,
+ b public.tst_comp_basic_t[],
+ c public.tst_enum_t,
+ d public.tst_enum_t[]
+ );
+ CREATE TABLE public.tst_comp_mix_array (
+ a public.tst_comp_mix_t PRIMARY KEY,
+ b public.tst_comp_mix_t[]
+ );
+ CREATE TABLE public.tst_range (
+ a INTEGER PRIMARY KEY,
+ b int4range
+ );
+ CREATE TABLE public.tst_range_array (
+ a INTEGER PRIMARY KEY,
+ b TSTZRANGE,
+ c int8range[]
+ );
+ CREATE TABLE public.tst_hstore (
+ a INTEGER PRIMARY KEY,
+ b public.hstore
+ ););
+
+# Setup structure on both nodes
+$node_publisher->safe_psql('postgres', $ddl);
+$node_subscriber->safe_psql('postgres', $ddl);
+
+# Setup logical replication
+my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
+$node_publisher->safe_psql('postgres',
+ "CREATE PUBLICATION tap_pub FOR ALL TABLES");
+
+my $appname = 'tap_sub';
+$node_subscriber->safe_psql('postgres',
+ "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub WITH (SLOT NAME = tap_sub_slot)");
+
+# Wait for subscriber to finish initialization
+my $caughtup_query =
+"SELECT pg_current_xlog_location() <= replay_location FROM pg_stat_replication WHERE application_name = '$appname';";
+$node_publisher->poll_query_until('postgres', $caughtup_query)
+ or die "Timed out while waiting for subscriber to catch up";
+
+# Insert initial test data
+$node_publisher->safe_psql('postgres', qq(
+ -- test_tbl_one_array_col
+ INSERT INTO tst_one_array (a, b) VALUES
+ (1, '{1, 2, 3}'),
+ (2, '{2, 3, 1}'),
+ (3, '{3, 2, 1}'),
+ (4, '{4, 3, 2}'),
+ (5, '{5, NULL, 3}');
+
+ -- test_tbl_arrays
+ INSERT INTO tst_arrays (a, b, c, d) VALUES
+ ('{1, 2, 3}', '{"a", "b", "c"}', '{1.1, 2.2, 3.3}', '{"1 day", "2 days", "3 days"}'),
+ ('{2, 3, 1}', '{"b", "c", "a"}', '{2.2, 3.3, 1.1}', '{"2 minutes", "3 minutes", "1 minute"}'),
+ ('{3, 1, 2}', '{"c", "a", "b"}', '{3.3, 1.1, 2.2}', '{"3 years", "1 year", "2 years"}'),
+ ('{4, 1, 2}', '{"d", "a", "b"}', '{4.4, 1.1, 2.2}', '{"4 years", "1 year", "2 years"}'),
+ ('{5, NULL, NULL}', '{"e", NULL, "b"}', '{5.5, 1.1, NULL}', '{"5 years", NULL, NULL}');
+
+ -- test_tbl_single_enum
+ INSERT INTO tst_one_enum (a, b) VALUES
+ (1, 'a'),
+ (2, 'b'),
+ (3, 'c'),
+ (4, 'd'),
+ (5, NULL);
+
+ -- test_tbl_enums
+ INSERT INTO tst_enums (a, b) VALUES
+ ('a', '{b, c}'),
+ ('b', '{c, a}'),
+ ('c', '{b, a}'),
+ ('d', '{c, b}'),
+ ('e', '{d, NULL}');
+
+ -- test_tbl_single_composites
+ INSERT INTO tst_one_comp (a, b) VALUES
+ (1, ROW(1.0, 'a', 1)),
+ (2, ROW(2.0, 'b', 2)),
+ (3, ROW(3.0, 'c', 3)),
+ (4, ROW(4.0, 'd', 4)),
+ (5, ROW(NULL, NULL, 5));
+
+ -- test_tbl_composites
+ INSERT INTO tst_comps (a, b) VALUES
+ (ROW(1.0, 'a', 1), ARRAY[ROW(1, 'a', 1)::tst_comp_basic_t]),
+ (ROW(2.0, 'b', 2), ARRAY[ROW(2, 'b', 2)::tst_comp_basic_t]),
+ (ROW(3.0, 'c', 3), ARRAY[ROW(3, 'c', 3)::tst_comp_basic_t]),
+ (ROW(4.0, 'd', 4), ARRAY[ROW(4, 'd', 3)::tst_comp_basic_t]),
+ (ROW(5.0, 'e', NULL), ARRAY[NULL, ROW(5, NULL, 5)::tst_comp_basic_t]);
+
+ -- test_tbl_composite_with_enums
+ INSERT INTO tst_comp_enum (a, b) VALUES
+ (1, ROW(1.0, 'a', 1)),
+ (2, ROW(2.0, 'b', 2)),
+ (3, ROW(3.0, 'c', 3)),
+ (4, ROW(4.0, 'd', 4)),
+ (5, ROW(NULL, 'e', NULL));
+
+ -- test_tbl_composite_with_enums_array
+ INSERT INTO tst_comp_enum_array (a, b) VALUES
+ (ROW(1.0, 'a', 1), ARRAY[ROW(1, 'a', 1)::tst_comp_enum_t]),
+ (ROW(2.0, 'b', 2), ARRAY[ROW(2, 'b', 2)::tst_comp_enum_t]),
+ (ROW(3.0, 'c', 3), ARRAY[ROW(3, 'c', 3)::tst_comp_enum_t]),
+ (ROW(4.0, 'd', 3), ARRAY[ROW(3, 'd', 3)::tst_comp_enum_t]),
+ (ROW(5.0, 'e', 3), ARRAY[ROW(3, 'e', 3)::tst_comp_enum_t, NULL]);
+
+ -- test_tbl_composite_with_single_enums_array_in_composite
+ INSERT INTO tst_comp_one_enum_array (a, b) VALUES
+ (1, ROW(1.0, '{a, b, c}', 1)),
+ (2, ROW(2.0, '{a, b, c}', 2)),
+ (3, ROW(3.0, '{a, b, c}', 3)),
+ (4, ROW(4.0, '{c, b, d}', 4)),
+ (5, ROW(5.0, '{NULL, e, NULL}', 5));
+
+ -- test_tbl_composite_with_enums_array_in_composite
+ INSERT INTO tst_comp_enum_what (a, b) VALUES
+ (ROW(1.0, '{a, b, c}', 1), ARRAY[ROW(1, '{a, b, c}', 1)::tst_comp_enum_array_t]),
+ (ROW(2.0, '{b, c, a}', 2), ARRAY[ROW(2, '{b, c, a}', 1)::tst_comp_enum_array_t]),
+ (ROW(3.0, '{c, a, b}', 1), ARRAY[ROW(3, '{c, a, b}', 1)::tst_comp_enum_array_t]),
+ (ROW(4.0, '{c, b, d}', 4), ARRAY[ROW(4, '{c, b, d}', 4)::tst_comp_enum_array_t]),
+ (ROW(5.0, '{c, NULL, b}', NULL), ARRAY[ROW(5, '{c, e, b}', 1)::tst_comp_enum_array_t]);
+
+ -- test_tbl_mixed_composites
+ INSERT INTO tst_comp_mix_array (a, b) VALUES
+ (ROW(
+ ROW(1,'a',1),
+ ARRAY[ROW(1,'a',1)::tst_comp_basic_t, ROW(2,'b',2)::tst_comp_basic_t],
+ 'a',
+ '{a,b,NULL,c}'),
+ ARRAY[
+ ROW(
+ ROW(1,'a',1),
+ ARRAY[
+ ROW(1,'a',1)::tst_comp_basic_t,
+ ROW(2,'b',2)::tst_comp_basic_t,
+ NULL
+ ],
+ 'a',
+ '{a,b,c}'
+ )::tst_comp_mix_t
+ ]
+ );
+
+ -- test_tbl_range
+ INSERT INTO tst_range (a, b) VALUES
+ (1, '[1, 10]'),
+ (2, '[2, 20]'),
+ (3, '[3, 30]'),
+ (4, '[4, 40]'),
+ (5, '[5, 50]');
+
+ -- test_tbl_range_array
+ INSERT INTO tst_range_array (a, b, c) VALUES
+ (1, tstzrange('Mon Aug 04 00:00:00 2014 CEST'::timestamptz, 'infinity'), '{"[1,2]", "[10,20]"}'),
+ (2, tstzrange('Mon Aug 04 00:00:00 2014 CEST'::timestamptz - interval '2 days', 'Mon Aug 04 00:00:00 2014 CEST'::timestamptz), '{"[2,3]", "[20,30]"}'),
+ (3, tstzrange('Mon Aug 04 00:00:00 2014 CEST'::timestamptz - interval '3 days', 'Mon Aug 04 00:00:00 2014 CEST'::timestamptz), '{"[3,4]"}'),
+ (4, tstzrange('Mon Aug 04 00:00:00 2014 CEST'::timestamptz - interval '4 days', 'Mon Aug 04 00:00:00 2014 CEST'::timestamptz), '{"[4,5]", NULL, "[40,50]"}'),
+ (5, NULL, NULL);
+
+ -- tst_hstore
+ INSERT INTO tst_hstore (a, b) VALUES
+ (1, '"a"=>"1"'),
+ (2, '"zzz"=>"foo"'),
+ (3, '"123"=>"321"'),
+ (4, '"yellow horse"=>"moaned"');
+));
+
+$node_publisher->poll_query_until('postgres', $caughtup_query)
+ or die "Timed out while waiting for subscriber to catch up";
+
+# Check the data on subscriber
+my $result = $node_subscriber->safe_psql('postgres', qq(
+ SET timezone = '+2';
+ SELECT a, b FROM tst_one_array ORDER BY a;
+ SELECT a, b, c, d FROM tst_arrays ORDER BY a;
+ SELECT a, b FROM tst_one_enum ORDER BY a;
+ SELECT a, b FROM tst_enums ORDER BY a;
+ SELECT a, b FROM tst_one_comp ORDER BY a;
+ SELECT a, b FROM tst_comps ORDER BY a;
+ SELECT a, b FROM tst_comp_enum ORDER BY a;
+ SELECT a, b FROM tst_comp_enum_array ORDER BY a;
+ SELECT a, b FROM tst_comp_one_enum_array ORDER BY a;
+ SELECT a, b FROM tst_comp_enum_what ORDER BY a;
+ SELECT a, b FROM tst_comp_mix_array ORDER BY a;
+ SELECT a, b FROM tst_range ORDER BY a;
+ SELECT a, b, c FROM tst_range_array ORDER BY a;
+ SELECT a, b FROM tst_hstore ORDER BY a;
+));
+
+is($result, '1|{1,2,3}
+2|{2,3,1}
+3|{3,2,1}
+4|{4,3,2}
+5|{5,NULL,3}
+{1,2,3}|{a,b,c}|{1.1,2.2,3.3}|{"1 day","2 days","3 days"}
+{2,3,1}|{b,c,a}|{2.2,3.3,1.1}|{00:02:00,00:03:00,00:01:00}
+{3,1,2}|{c,a,b}|{3.3,1.1,2.2}|{"3 years","1 year","2 years"}
+{4,1,2}|{d,a,b}|{4.4,1.1,2.2}|{"4 years","1 year","2 years"}
+{5,NULL,NULL}|{e,NULL,b}|{5.5,1.1,NULL}|{"5 years",NULL,NULL}
+1|a
+2|b
+3|c
+4|d
+5|
+a|{b,c}
+b|{c,a}
+c|{b,a}
+d|{c,b}
+e|{d,NULL}
+1|(1,a,1)
+2|(2,b,2)
+3|(3,c,3)
+4|(4,d,4)
+5|(,,5)
+(1,a,1)|{"(1,a,1)"}
+(2,b,2)|{"(2,b,2)"}
+(3,c,3)|{"(3,c,3)"}
+(4,d,4)|{"(4,d,3)"}
+(5,e,)|{NULL,"(5,,5)"}
+1|(1,a,1)
+2|(2,b,2)
+3|(3,c,3)
+4|(4,d,4)
+5|(,e,)
+(1,a,1)|{"(1,a,1)"}
+(2,b,2)|{"(2,b,2)"}
+(3,c,3)|{"(3,c,3)"}
+(4,d,3)|{"(3,d,3)"}
+(5,e,3)|{"(3,e,3)",NULL}
+1|(1,"{a,b,c}",1)
+2|(2,"{a,b,c}",2)
+3|(3,"{a,b,c}",3)
+4|(4,"{c,b,d}",4)
+5|(5,"{NULL,e,NULL}",5)
+(1,"{a,b,c}",1)|{"(1,\"{a,b,c}\",1)"}
+(2,"{b,c,a}",2)|{"(2,\"{b,c,a}\",1)"}
+(3,"{c,a,b}",1)|{"(3,\"{c,a,b}\",1)"}
+(4,"{c,b,d}",4)|{"(4,\"{c,b,d}\",4)"}
+(5,"{c,NULL,b}",)|{"(5,\"{c,e,b}\",1)"}
+("(1,a,1)","{""(1,a,1)"",""(2,b,2)""}",a,"{a,b,NULL,c}")|{"(\"(1,a,1)\",\"{\"\"(1,a,1)\"\",\"\"(2,b,2)\"\",NULL}\",a,\"{a,b,c}\")"}
+1|[1,11)
+2|[2,21)
+3|[3,31)
+4|[4,41)
+5|[5,51)
+1|["2014-08-04 00:00:00+02",infinity)|{"[1,3)","[10,21)"}
+2|["2014-08-02 00:00:00+02","2014-08-04 00:00:00+02")|{"[2,4)","[20,31)"}
+3|["2014-08-01 00:00:00+02","2014-08-04 00:00:00+02")|{"[3,5)"}
+4|["2014-07-31 00:00:00+02","2014-08-04 00:00:00+02")|{"[4,6)",NULL,"[40,51)"}
+5||
+1|"a"=>"1"
+2|"zzz"=>"foo"
+3|"123"=>"321"
+4|"yellow horse"=>"moaned"',
+'check replicated inserts on subscriber');
+
+# Run batch of updates
+$node_publisher->safe_psql('postgres', qq(
+ UPDATE tst_one_array SET b = '{4, 5, 6}' WHERE a = 1;
+ UPDATE tst_one_array SET b = '{4, 5, 6, 1}' WHERE a > 3;
+ UPDATE tst_arrays SET b = '{"1a", "2b", "3c"}', c = '{1.0, 2.0, 3.0}', d = '{"1 day 1 second", "2 days 2 seconds", "3 days 3 second"}' WHERE a = '{1, 2, 3}';
+ UPDATE tst_arrays SET b = '{"c", "d", "e"}', c = '{3.0, 4.0, 5.0}', d = '{"3 day 1 second", "4 days 2 seconds", "5 days 3 second"}' WHERE a[1] > 3;
+ UPDATE tst_one_enum SET b = 'c' WHERE a = 1;
+ UPDATE tst_one_enum SET b = NULL WHERE a > 3;
+ UPDATE tst_enums SET b = '{e, NULL}' WHERE a = 'a';
+ UPDATE tst_enums SET b = '{e, d}' WHERE a > 'c';
+ UPDATE tst_one_comp SET b = ROW(1.0, 'A', 1) WHERE a = 1;
+ UPDATE tst_one_comp SET b = ROW(NULL, 'x', -1) WHERE a > 3;
+ UPDATE tst_comps SET b = ARRAY[ROW(9, 'x', -1)::tst_comp_basic_t] WHERE (a).a = 1.0;
+ UPDATE tst_comps SET b = ARRAY[NULL, ROW(9, 'x', NULL)::tst_comp_basic_t] WHERE (a).a > 3.9;
+ UPDATE tst_comp_enum SET b = ROW(1.0, NULL, NULL) WHERE a = 1;
+ UPDATE tst_comp_enum SET b = ROW(4.0, 'd', 44) WHERE a > 3;
+ UPDATE tst_comp_enum_array SET b = ARRAY[NULL, ROW(3, 'd', 3)::tst_comp_enum_t] WHERE a = ROW(1.0, 'a', 1)::tst_comp_enum_t;
+ UPDATE tst_comp_enum_array SET b = ARRAY[ROW(1, 'a', 1)::tst_comp_enum_t, ROW(2, 'b', 2)::tst_comp_enum_t] WHERE (a).a > 3;
+ UPDATE tst_comp_one_enum_array SET b = ROW(1.0, '{a, e, c}', NULL) WHERE a = 1;
+ UPDATE tst_comp_one_enum_array SET b = ROW(4.0, '{c, b, d}', 4) WHERE a > 3;
+ UPDATE tst_comp_enum_what SET b = ARRAY[NULL, ROW(1, '{a, b, c}', 1)::tst_comp_enum_array_t, ROW(NULL, '{a, e, c}', 2)::tst_comp_enum_array_t] WHERE (a).a = 1;
+ UPDATE tst_comp_enum_what SET b = ARRAY[ROW(5, '{a, b, c}', 5)::tst_comp_enum_array_t] WHERE (a).a > 3;
+ UPDATE tst_comp_mix_array SET b[2] = NULL WHERE ((a).a).a = 1;
+ UPDATE tst_range SET b = '[100, 1000]' WHERE a = 1;
+ UPDATE tst_range SET b = '(1, 90)' WHERE a > 3;
+ UPDATE tst_range_array SET c = '{"[100, 1000]"}' WHERE a = 1;
+ UPDATE tst_range_array SET b = tstzrange('Mon Aug 04 00:00:00 2014 CEST'::timestamptz, 'infinity'), c = '{NULL, "[11,9999999]"}' WHERE a > 3;
+ UPDATE tst_hstore SET b = '"updated"=>"value"' WHERE a < 3;
+ UPDATE tst_hstore SET b = '"also"=>"updated"' WHERE a = 3;
+));
+
+$node_publisher->poll_query_until('postgres', $caughtup_query)
+ or die "Timed out while waiting for subscriber to catch up";
+
+# Check the data on subscriber
+$result = $node_subscriber->safe_psql('postgres', qq(
+ SET timezone = '+2';
+ SELECT a, b FROM tst_one_array ORDER BY a;
+ SELECT a, b, c, d FROM tst_arrays ORDER BY a;
+ SELECT a, b FROM tst_one_enum ORDER BY a;
+ SELECT a, b FROM tst_enums ORDER BY a;
+ SELECT a, b FROM tst_one_comp ORDER BY a;
+ SELECT a, b FROM tst_comps ORDER BY a;
+ SELECT a, b FROM tst_comp_enum ORDER BY a;
+ SELECT a, b FROM tst_comp_enum_array ORDER BY a;
+ SELECT a, b FROM tst_comp_one_enum_array ORDER BY a;
+ SELECT a, b FROM tst_comp_enum_what ORDER BY a;
+ SELECT a, b FROM tst_comp_mix_array ORDER BY a;
+ SELECT a, b FROM tst_range ORDER BY a;
+ SELECT a, b, c FROM tst_range_array ORDER BY a;
+ SELECT a, b FROM tst_hstore ORDER BY a;
+));
+
+is($result, '1|{4,5,6}
+2|{2,3,1}
+3|{3,2,1}
+4|{4,5,6,1}
+5|{4,5,6,1}
+{1,2,3}|{1a,2b,3c}|{1,2,3}|{"1 day 00:00:01","2 days 00:00:02","3 days 00:00:03"}
+{2,3,1}|{b,c,a}|{2.2,3.3,1.1}|{00:02:00,00:03:00,00:01:00}
+{3,1,2}|{c,a,b}|{3.3,1.1,2.2}|{"3 years","1 year","2 years"}
+{4,1,2}|{c,d,e}|{3,4,5}|{"3 days 00:00:01","4 days 00:00:02","5 days 00:00:03"}
+{5,NULL,NULL}|{c,d,e}|{3,4,5}|{"3 days 00:00:01","4 days 00:00:02","5 days 00:00:03"}
+1|c
+2|b
+3|c
+4|
+5|
+a|{e,NULL}
+b|{c,a}
+c|{b,a}
+d|{e,d}
+e|{e,d}
+1|(1,A,1)
+2|(2,b,2)
+3|(3,c,3)
+4|(,x,-1)
+5|(,x,-1)
+(1,a,1)|{"(9,x,-1)"}
+(2,b,2)|{"(2,b,2)"}
+(3,c,3)|{"(3,c,3)"}
+(4,d,4)|{NULL,"(9,x,)"}
+(5,e,)|{NULL,"(9,x,)"}
+1|(1,,)
+2|(2,b,2)
+3|(3,c,3)
+4|(4,d,44)
+5|(4,d,44)
+(1,a,1)|{NULL,"(3,d,3)"}
+(2,b,2)|{"(2,b,2)"}
+(3,c,3)|{"(3,c,3)"}
+(4,d,3)|{"(1,a,1)","(2,b,2)"}
+(5,e,3)|{"(1,a,1)","(2,b,2)"}
+1|(1,"{a,e,c}",)
+2|(2,"{a,b,c}",2)
+3|(3,"{a,b,c}",3)
+4|(4,"{c,b,d}",4)
+5|(4,"{c,b,d}",4)
+(1,"{a,b,c}",1)|{NULL,"(1,\"{a,b,c}\",1)","(,\"{a,e,c}\",2)"}
+(2,"{b,c,a}",2)|{"(2,\"{b,c,a}\",1)"}
+(3,"{c,a,b}",1)|{"(3,\"{c,a,b}\",1)"}
+(4,"{c,b,d}",4)|{"(5,\"{a,b,c}\",5)"}
+(5,"{c,NULL,b}",)|{"(5,\"{a,b,c}\",5)"}
+("(1,a,1)","{""(1,a,1)"",""(2,b,2)""}",a,"{a,b,NULL,c}")|{"(\"(1,a,1)\",\"{\"\"(1,a,1)\"\",\"\"(2,b,2)\"\",NULL}\",a,\"{a,b,c}\")",NULL}
+1|[100,1001)
+2|[2,21)
+3|[3,31)
+4|[2,90)
+5|[2,90)
+1|["2014-08-04 00:00:00+02",infinity)|{"[100,1001)"}
+2|["2014-08-02 00:00:00+02","2014-08-04 00:00:00+02")|{"[2,4)","[20,31)"}
+3|["2014-08-01 00:00:00+02","2014-08-04 00:00:00+02")|{"[3,5)"}
+4|["2014-08-04 00:00:00+02",infinity)|{NULL,"[11,10000000)"}
+5|["2014-08-04 00:00:00+02",infinity)|{NULL,"[11,10000000)"}
+1|"updated"=>"value"
+2|"updated"=>"value"
+3|"also"=>"updated"
+4|"yellow horse"=>"moaned"',
+'check replicated updates on subscriber');
+
+# Run batch of deletes
+$node_publisher->safe_psql('postgres', qq(
+ DELETE FROM tst_one_array WHERE a = 1;
+ DELETE FROM tst_one_array WHERE b = '{2, 3, 1}';
+ DELETE FROM tst_arrays WHERE a = '{1, 2, 3}';
+ DELETE FROM tst_arrays WHERE a[1] = 2;
+ DELETE FROM tst_one_enum WHERE a = 1;
+ DELETE FROM tst_one_enum WHERE b = 'b';
+ DELETE FROM tst_enums WHERE a = 'a';
+ DELETE FROM tst_enums WHERE b[1] = 'b';
+ DELETE FROM tst_one_comp WHERE a = 1;
+ DELETE FROM tst_one_comp WHERE (b).a = 2.0;
+ DELETE FROM tst_comps WHERE (a).b = 'a';
+ DELETE FROM tst_comps WHERE ROW(3, 'c', 3)::tst_comp_basic_t = ANY(b);
+ DELETE FROM tst_comp_enum WHERE a = 1;
+ DELETE FROM tst_comp_enum WHERE (b).a = 2.0;
+ DELETE FROM tst_comp_enum_array WHERE a = ROW(1.0, 'a', 1)::tst_comp_enum_t;
+ DELETE FROM tst_comp_enum_array WHERE ROW(3, 'c', 3)::tst_comp_enum_t = ANY(b);
+ DELETE FROM tst_comp_one_enum_array WHERE a = 1;
+ DELETE FROM tst_comp_one_enum_array WHERE 'a' = ANY((b).b);
+ DELETE FROM tst_comp_enum_what WHERE (a).a = 1;
+ DELETE FROM tst_comp_enum_what WHERE (b[1]).b = '{c, a, b}';
+ DELETE FROM tst_comp_mix_array WHERE ((a).a).a = 1;
+ DELETE FROM tst_range WHERE a = 1;
+ DELETE FROM tst_range WHERE '[10,20]' && b;
+ DELETE FROM tst_range_array WHERE a = 1;
+ DELETE FROM tst_range_array WHERE tstzrange('Mon Aug 04 00:00:00 2014 CEST'::timestamptz, 'Mon Aug 05 00:00:00 2014 CEST'::timestamptz) && b;
+ DELETE FROM tst_hstore WHERE a = 1;
+));
+
+$node_publisher->poll_query_until('postgres', $caughtup_query)
+ or die "Timed out while waiting for subscriber to catch up";
+
+# Check the data on subscriber
+$result = $node_subscriber->safe_psql('postgres', qq(
+ SET timezone = '+2';
+ SELECT a, b FROM tst_one_array ORDER BY a;
+ SELECT a, b, c, d FROM tst_arrays ORDER BY a;
+ SELECT a, b FROM tst_one_enum ORDER BY a;
+ SELECT a, b FROM tst_enums ORDER BY a;
+ SELECT a, b FROM tst_one_comp ORDER BY a;
+ SELECT a, b FROM tst_comps ORDER BY a;
+ SELECT a, b FROM tst_comp_enum ORDER BY a;
+ SELECT a, b FROM tst_comp_enum_array ORDER BY a;
+ SELECT a, b FROM tst_comp_one_enum_array ORDER BY a;
+ SELECT a, b FROM tst_comp_enum_what ORDER BY a;
+ SELECT a, b FROM tst_comp_mix_array ORDER BY a;
+ SELECT a, b FROM tst_range ORDER BY a;
+ SELECT a, b, c FROM tst_range_array ORDER BY a;
+ SELECT a, b FROM tst_hstore ORDER BY a;
+));
+
+is($result, '3|{3,2,1}
+4|{4,5,6,1}
+5|{4,5,6,1}
+{3,1,2}|{c,a,b}|{3.3,1.1,2.2}|{"3 years","1 year","2 years"}
+{4,1,2}|{c,d,e}|{3,4,5}|{"3 days 00:00:01","4 days 00:00:02","5 days 00:00:03"}
+{5,NULL,NULL}|{c,d,e}|{3,4,5}|{"3 days 00:00:01","4 days 00:00:02","5 days 00:00:03"}
+3|c
+4|
+5|
+b|{c,a}
+d|{e,d}
+e|{e,d}
+3|(3,c,3)
+4|(,x,-1)
+5|(,x,-1)
+(2,b,2)|{"(2,b,2)"}
+(4,d,4)|{NULL,"(9,x,)"}
+(5,e,)|{NULL,"(9,x,)"}
+3|(3,c,3)
+4|(4,d,44)
+5|(4,d,44)
+(2,b,2)|{"(2,b,2)"}
+(4,d,3)|{"(1,a,1)","(2,b,2)"}
+(5,e,3)|{"(1,a,1)","(2,b,2)"}
+4|(4,"{c,b,d}",4)
+5|(4,"{c,b,d}",4)
+(2,"{b,c,a}",2)|{"(2,\"{b,c,a}\",1)"}
+(4,"{c,b,d}",4)|{"(5,\"{a,b,c}\",5)"}
+(5,"{c,NULL,b}",)|{"(5,\"{a,b,c}\",5)"}
+2|["2014-08-02 00:00:00+02","2014-08-04 00:00:00+02")|{"[2,4)","[20,31)"}
+3|["2014-08-01 00:00:00+02","2014-08-04 00:00:00+02")|{"[3,5)"}
+2|"updated"=>"value"
+3|"also"=>"updated"
+4|"yellow horse"=>"moaned"',
+'check replicated deletes on subscriber');
+
+$node_subscriber->stop('fast');
+$node_publisher->stop('fast');
--
cgit v1.2.3
From 954737095061e5b5f1d87fb8cc43f7f8afff64c6 Mon Sep 17 00:00:00 2001
From: Fujii Masao
Date: Sat, 21 Jan 2017 00:55:36 +0900
Subject: Add description of temporary column into pg_replication_slots doc.
Ayumi Ishii
---
doc/src/sgml/catalogs.sgml | 9 +++++++++
1 file changed, 9 insertions(+)
(limited to 'doc/src')
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 7c758a5081..524180e011 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -9315,6 +9315,15 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
null. Only logical slots have an associated database.
+
+ temporary
+ boolean
+
+ True if this is temporary replication slot. Temporary slots are
+ not saved to disk and are automatically dropped on error or when
+ the session has finished.
+
+
activeboolean
--
cgit v1.2.3
From cdc2a70470bdbe3663dc464deb753d6d931bba61 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Fri, 20 Jan 2017 11:10:02 -0500
Subject: Allow backslash line continuations in pgbench's meta commands.
A pgbench meta command can now be continued onto additional line(s) of a
script file by writing backslash-return. The continuation marker is
equivalent to white space in that it separates tokens.
Eventually it'd be nice to have the same thing in psql, but that will
be a much larger project.
Fabien Coelho, reviewed by Rafia Sabih
Discussion: https://postgr.es/m/alpine.DEB.2.20.1610031049310.19411@lancre
---
doc/src/sgml/ref/pgbench.sgml | 6 ++++--
src/bin/pgbench/exprscan.l | 21 +++++++++++++++++++--
2 files changed, 23 insertions(+), 4 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml
index 3fb29f8c1d..1eee8dc574 100644
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -809,7 +809,8 @@ pgbench options> dbname>
Script file meta commands begin with a backslash (\>) and
- extend to the end of the line.
+ normally extend to the end of the line, although they can be continued
+ to additional lines by writing backslash-return.
Arguments to a meta command are separated by white space.
These meta commands are supported:
@@ -838,7 +839,8 @@ pgbench options> dbname>
Examples:
\set ntellers 10 * :scale
-\set aid (1021 * random(1, 100000 * :scale)) % (100000 * :scale) + 1
+\set aid (1021 * random(1, 100000 * :scale)) % \
+ (100000 * :scale) + 1
diff --git a/src/bin/pgbench/exprscan.l b/src/bin/pgbench/exprscan.l
index 9a3be3d667..dc1367bbdb 100644
--- a/src/bin/pgbench/exprscan.l
+++ b/src/bin/pgbench/exprscan.l
@@ -66,6 +66,9 @@ space [ \t\r\f\v]
nonspace [^ \t\r\f\v\n]
newline [\n]
+/* Line continuation marker */
+continuation \\{newline}
+
/* Exclusive states */
%x EXPR
@@ -96,8 +99,20 @@ newline [\n]
return 1;
}
+ /*
+ * We need this rule to avoid returning "word\" instead of recognizing
+ * a continuation marker just after a word:
+ */
+{nonspace}+{continuation} {
+ /* Found "word\\\n", emit and return just "word" */
+ psqlscan_emit(cur_state, yytext, yyleng - 2);
+ return 1;
+ }
+
{space}+ { /* ignore */ }
+{continuation} { /* ignore */ }
+
{newline} {
/* report end of command */
last_was_newline = true;
@@ -138,14 +153,16 @@ newline [\n]
return FUNCTION;
}
+{space}+ { /* ignore */ }
+
+{continuation} { /* ignore */ }
+
{newline} {
/* report end of command */
last_was_newline = true;
return 0;
}
-{space}+ { /* ignore */ }
-
. {
/*
* must strdup yytext so that expr_yyerror_more doesn't
--
cgit v1.2.3
From 0bc1207aeb3de951bf95a9e9899b1256216d65f5 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Mon, 23 Jan 2017 14:00:58 -0500
Subject: Fix default minimum value for descending sequences
For some reason that is lost in history, a descending sequence would
default its minimum value to -2^63+1 (-PG_INT64_MAX) instead of
-2^63 (PG_INT64_MIN), even though explicitly specifying a minimum value
of -2^63 would work. Fix this inconsistency by using the full range by
default.
Reported-by: Daniel Verite
Reviewed-by: Michael Paquier
---
doc/src/sgml/ref/create_sequence.sgml | 2 +-
src/backend/commands/sequence.c | 4 ++--
src/bin/pg_dump/pg_dump.c | 4 ++--
src/include/pg_config_manual.h | 6 ------
4 files changed, 5 insertions(+), 11 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/create_sequence.sgml b/doc/src/sgml/ref/create_sequence.sgml
index 62ae379226..86ff018c4b 100644
--- a/doc/src/sgml/ref/create_sequence.sgml
+++ b/doc/src/sgml/ref/create_sequence.sgml
@@ -133,7 +133,7 @@ SELECT * FROM name;
the minimum value a sequence can generate. If this clause is not
supplied or
NO MINVALUE
is specified, then
defaults will be used. The defaults are 1 and
- -263>-1 for ascending and descending sequences,
+ -263> for ascending and descending sequences,
respectively.
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
index 36f1249ee5..0c673f5763 100644
--- a/src/backend/commands/sequence.c
+++ b/src/backend/commands/sequence.c
@@ -1353,7 +1353,7 @@ init_params(ParseState *pstate, List *options, bool isInit,
else if (isInit || max_value != NULL)
{
if (seqform->seqincrement > 0)
- seqform->seqmax = SEQ_MAXVALUE; /* ascending seq */
+ seqform->seqmax = PG_INT64_MAX; /* ascending seq */
else
seqform->seqmax = -1; /* descending seq */
seqdataform->log_cnt = 0;
@@ -1370,7 +1370,7 @@ init_params(ParseState *pstate, List *options, bool isInit,
if (seqform->seqincrement > 0)
seqform->seqmin = 1; /* ascending seq */
else
- seqform->seqmin = SEQ_MINVALUE; /* descending seq */
+ seqform->seqmin = PG_INT64_MIN; /* descending seq */
seqdataform->log_cnt = 0;
}
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 049c9cdfd7..e3cca62bf7 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -15876,8 +15876,8 @@ dumpSequence(Archive *fout, TableInfo *tbinfo)
/* Make sure we are in proper schema */
selectSourceSchema(fout, tbinfo->dobj.namespace->dobj.name);
- snprintf(bufm, sizeof(bufm), INT64_FORMAT, SEQ_MINVALUE);
- snprintf(bufx, sizeof(bufx), INT64_FORMAT, SEQ_MAXVALUE);
+ snprintf(bufm, sizeof(bufm), INT64_FORMAT, PG_INT64_MIN);
+ snprintf(bufx, sizeof(bufx), INT64_FORMAT, PG_INT64_MAX);
if (fout->remoteVersion >= 100000)
{
diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
index c07907145a..f3b35297d1 100644
--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -50,12 +50,6 @@
*/
#define PARTITION_MAX_KEYS 32
-/*
- * Set the upper and lower bounds of sequence values.
- */
-#define SEQ_MAXVALUE PG_INT64_MAX
-#define SEQ_MINVALUE (-SEQ_MAXVALUE)
-
/*
* When we don't have native spinlocks, we use semaphores to simulate them.
* Decreasing this value reduces consumption of OS resources; increasing it
--
cgit v1.2.3
From 7b4ac19982a77a1a2a6f096c4a11ee7325a14d2c Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Tue, 24 Jan 2017 16:42:58 -0500
Subject: Extend index AM API for parallel index scans.
This patch doesn't actually make any index AM parallel-aware, but it
provides the necessary functions at the AM layer to do so.
Rahila Syed, Amit Kapila, Robert Haas
---
contrib/bloom/blutils.c | 3 +
doc/src/sgml/indexam.sgml | 67 +++++++++++++++++
src/backend/access/brin/brin.c | 3 +
src/backend/access/gin/ginutil.c | 3 +
src/backend/access/gist/gist.c | 3 +
src/backend/access/hash/hash.c | 3 +
src/backend/access/index/indexam.c | 135 +++++++++++++++++++++++++++++++++--
src/backend/access/nbtree/nbtree.c | 3 +
src/backend/access/spgist/spgutils.c | 3 +
src/include/access/amapi.h | 17 +++++
src/include/access/genam.h | 9 +++
src/include/access/relscan.h | 13 ++++
src/include/c.h | 3 +
src/tools/pgindent/typedefs.list | 2 +
14 files changed, 262 insertions(+), 5 deletions(-)
(limited to 'doc/src')
diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c
index 06077afed6..858798db85 100644
--- a/contrib/bloom/blutils.c
+++ b/contrib/bloom/blutils.c
@@ -138,6 +138,9 @@ blhandler(PG_FUNCTION_ARGS)
amroutine->amendscan = blendscan;
amroutine->ammarkpos = NULL;
amroutine->amrestrpos = NULL;
+ amroutine->amestimateparallelscan = NULL;
+ amroutine->aminitparallelscan = NULL;
+ amroutine->amparallelrescan = NULL;
PG_RETURN_POINTER(amroutine);
}
diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml
index 40f201b11b..5d8e557460 100644
--- a/doc/src/sgml/indexam.sgml
+++ b/doc/src/sgml/indexam.sgml
@@ -131,6 +131,11 @@ typedef struct IndexAmRoutine
amendscan_function amendscan;
ammarkpos_function ammarkpos; /* can be NULL */
amrestrpos_function amrestrpos; /* can be NULL */
+
+ /* interface functions to support parallel index scans */
+ amestimateparallelscan_function amestimateparallelscan; /* can be NULL */
+ aminitparallelscan_function aminitparallelscan; /* can be NULL */
+ amparallelrescan_function amparallelrescan; /* can be NULL */
} IndexAmRoutine;
@@ -624,6 +629,68 @@ amrestrpos (IndexScanDesc scan);
the amrestrpos> field in its IndexAmRoutine>
struct may be set to NULL.
+
+
+ In addition to supporting ordinary index scans, some types of index
+ may wish to support parallel index scans>, which allow
+ multiple backends to cooperate in performing an index scan. The
+ index access method should arrange things so that each cooperating
+ process returns a subset of the tuples that would be performed by
+ an ordinary, non-parallel index scan, but in such a way that the
+ union of those subsets is equal to the set of tuples that would be
+ returned by an ordinary, non-parallel index scan. Furthermore, while
+ there need not be any global ordering of tuples returned by a parallel
+ scan, the ordering of that subset of tuples returned within each
+ cooperating backend must match the requested ordering. The following
+ functions may be implemented to support parallel index scans:
+
+
+
+
+Size
+amestimateparallelscan (void);
+
+ Estimate and return the number of bytes of dynamic shared memory which
+ the access method will be needed to perform a parallel scan. (This number
+ is in addition to, not in lieu of, the amount of space needed for
+ AM-independent data in ParallelIndexScanDescData>.)
+
+
+
+ It is not necessary to implement this function for access methods which
+ do not support parallel scans or for which the number of additional bytes
+ of storage required is zero.
+
+
+
+
+void
+aminitparallelscan (void *target);
+
+ This function will be called to initialize dynamic shared memory at the
+ beginning of a parallel scan. target> will point to at least
+ the number of bytes previously returned by
+ amestimateparallelscan>, and this function may use that
+ amount of space to store whatever data it wishes.
+
+
+
+ It is not necessary to implement this function for access methods which
+ do not support parallel scans or in cases where the shared memory space
+ required needs no initialization.
+
+
+
+
+void
+amparallelrescan (IndexScanDesc scan);
+
+ This function, if implemented, will be called when a parallel index scan
+ must be restarted. It should reset any shared state set up by
+ aminitparallelscan> such that the scan will be restarted from
+ the beginning.
+
+
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index d60ddd242c..b2afdb7bed 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -112,6 +112,9 @@ brinhandler(PG_FUNCTION_ARGS)
amroutine->amendscan = brinendscan;
amroutine->ammarkpos = NULL;
amroutine->amrestrpos = NULL;
+ amroutine->amestimateparallelscan = NULL;
+ amroutine->aminitparallelscan = NULL;
+ amroutine->amparallelrescan = NULL;
PG_RETURN_POINTER(amroutine);
}
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index 3909638906..02d920bb9d 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -68,6 +68,9 @@ ginhandler(PG_FUNCTION_ARGS)
amroutine->amendscan = ginendscan;
amroutine->ammarkpos = NULL;
amroutine->amrestrpos = NULL;
+ amroutine->amestimateparallelscan = NULL;
+ amroutine->aminitparallelscan = NULL;
+ amroutine->amparallelrescan = NULL;
PG_RETURN_POINTER(amroutine);
}
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index 597056ae44..c2247ad2f7 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -89,6 +89,9 @@ gisthandler(PG_FUNCTION_ARGS)
amroutine->amendscan = gistendscan;
amroutine->ammarkpos = NULL;
amroutine->amrestrpos = NULL;
+ amroutine->amestimateparallelscan = NULL;
+ amroutine->aminitparallelscan = NULL;
+ amroutine->amparallelrescan = NULL;
PG_RETURN_POINTER(amroutine);
}
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index a64a9b9696..ec8ed33c70 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -86,6 +86,9 @@ hashhandler(PG_FUNCTION_ARGS)
amroutine->amendscan = hashendscan;
amroutine->ammarkpos = NULL;
amroutine->amrestrpos = NULL;
+ amroutine->amestimateparallelscan = NULL;
+ amroutine->aminitparallelscan = NULL;
+ amroutine->amparallelrescan = NULL;
PG_RETURN_POINTER(amroutine);
}
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c
index 4822af95a3..ba27c1e86d 100644
--- a/src/backend/access/index/indexam.c
+++ b/src/backend/access/index/indexam.c
@@ -20,6 +20,10 @@
* index_insert - insert an index tuple into a relation
* index_markpos - mark a scan position
* index_restrpos - restore a scan position
+ * index_parallelscan_estimate - estimate shared memory for parallel scan
+ * index_parallelscan_initialize - initialize parallel scan
+ * index_parallelrescan - (re)start a parallel scan of an index
+ * index_beginscan_parallel - join parallel index scan
* index_getnext_tid - get the next TID from a scan
* index_fetch_heap - get the scan's next heap tuple
* index_getnext - get the next heap tuple from a scan
@@ -120,7 +124,8 @@ do { \
} while(0)
static IndexScanDesc index_beginscan_internal(Relation indexRelation,
- int nkeys, int norderbys, Snapshot snapshot);
+ int nkeys, int norderbys, Snapshot snapshot,
+ ParallelIndexScanDesc pscan, bool temp_snap);
/* ----------------------------------------------------------------
@@ -219,7 +224,7 @@ index_beginscan(Relation heapRelation,
{
IndexScanDesc scan;
- scan = index_beginscan_internal(indexRelation, nkeys, norderbys, snapshot);
+ scan = index_beginscan_internal(indexRelation, nkeys, norderbys, snapshot, NULL, false);
/*
* Save additional parameters into the scandesc. Everything else was set
@@ -244,7 +249,7 @@ index_beginscan_bitmap(Relation indexRelation,
{
IndexScanDesc scan;
- scan = index_beginscan_internal(indexRelation, nkeys, 0, snapshot);
+ scan = index_beginscan_internal(indexRelation, nkeys, 0, snapshot, NULL, false);
/*
* Save additional parameters into the scandesc. Everything else was set
@@ -260,8 +265,11 @@ index_beginscan_bitmap(Relation indexRelation,
*/
static IndexScanDesc
index_beginscan_internal(Relation indexRelation,
- int nkeys, int norderbys, Snapshot snapshot)
+ int nkeys, int norderbys, Snapshot snapshot,
+ ParallelIndexScanDesc pscan, bool temp_snap)
{
+ IndexScanDesc scan;
+
RELATION_CHECKS;
CHECK_REL_PROCEDURE(ambeginscan);
@@ -276,8 +284,13 @@ index_beginscan_internal(Relation indexRelation,
/*
* Tell the AM to open a scan.
*/
- return indexRelation->rd_amroutine->ambeginscan(indexRelation, nkeys,
+ scan = indexRelation->rd_amroutine->ambeginscan(indexRelation, nkeys,
norderbys);
+ /* Initialize information for parallel scan. */
+ scan->parallel_scan = pscan;
+ scan->xs_temp_snap = temp_snap;
+
+ return scan;
}
/* ----------------
@@ -341,6 +354,9 @@ index_endscan(IndexScanDesc scan)
/* Release index refcount acquired by index_beginscan */
RelationDecrementReferenceCount(scan->indexRelation);
+ if (scan->xs_temp_snap)
+ UnregisterSnapshot(scan->xs_snapshot);
+
/* Release the scan data structure itself */
IndexScanEnd(scan);
}
@@ -389,6 +405,115 @@ index_restrpos(IndexScanDesc scan)
scan->indexRelation->rd_amroutine->amrestrpos(scan);
}
+/*
+ * index_parallelscan_estimate - estimate shared memory for parallel scan
+ *
+ * Currently, we don't pass any information to the AM-specific estimator,
+ * so it can probably only return a constant. In the future, we might need
+ * to pass more information.
+ */
+Size
+index_parallelscan_estimate(Relation indexRelation, Snapshot snapshot)
+{
+ Size nbytes;
+
+ RELATION_CHECKS;
+
+ nbytes = offsetof(ParallelIndexScanDescData, ps_snapshot_data);
+ nbytes = add_size(nbytes, EstimateSnapshotSpace(snapshot));
+ nbytes = MAXALIGN(nbytes);
+
+ /*
+ * If amestimateparallelscan is not provided, assume there is no
+ * AM-specific data needed. (It's hard to believe that could work, but
+ * it's easy enough to cater to it here.)
+ */
+ if (indexRelation->rd_amroutine->amestimateparallelscan != NULL)
+ nbytes = add_size(nbytes,
+ indexRelation->rd_amroutine->amestimateparallelscan());
+
+ return nbytes;
+}
+
+/*
+ * index_parallelscan_initialize - initialize parallel scan
+ *
+ * We initialize both the ParallelIndexScanDesc proper and the AM-specific
+ * information which follows it.
+ *
+ * This function calls access method specific initialization routine to
+ * initialize am specific information. Call this just once in the leader
+ * process; then, individual workers attach via index_beginscan_parallel.
+ */
+void
+index_parallelscan_initialize(Relation heapRelation, Relation indexRelation,
+ Snapshot snapshot, ParallelIndexScanDesc target)
+{
+ Size offset;
+
+ RELATION_CHECKS;
+
+ offset = add_size(offsetof(ParallelIndexScanDescData, ps_snapshot_data),
+ EstimateSnapshotSpace(snapshot));
+ offset = MAXALIGN(offset);
+
+ target->ps_relid = RelationGetRelid(heapRelation);
+ target->ps_indexid = RelationGetRelid(indexRelation);
+ target->ps_offset = offset;
+ SerializeSnapshot(snapshot, target->ps_snapshot_data);
+
+ /* aminitparallelscan is optional; assume no-op if not provided by AM */
+ if (indexRelation->rd_amroutine->aminitparallelscan != NULL)
+ {
+ void *amtarget;
+
+ amtarget = OffsetToPointer(target, offset);
+ indexRelation->rd_amroutine->aminitparallelscan(amtarget);
+ }
+}
+
+/* ----------------
+ * index_parallelrescan - (re)start a parallel scan of an index
+ * ----------------
+ */
+void
+index_parallelrescan(IndexScanDesc scan)
+{
+ SCAN_CHECKS;
+
+ /* amparallelrescan is optional; assume no-op if not provided by AM */
+ if (scan->indexRelation->rd_amroutine->amparallelrescan != NULL)
+ scan->indexRelation->rd_amroutine->amparallelrescan(scan);
+}
+
+/*
+ * index_beginscan_parallel - join parallel index scan
+ *
+ * Caller must be holding suitable locks on the heap and the index.
+ */
+IndexScanDesc
+index_beginscan_parallel(Relation heaprel, Relation indexrel, int nkeys,
+ int norderbys, ParallelIndexScanDesc pscan)
+{
+ Snapshot snapshot;
+ IndexScanDesc scan;
+
+ Assert(RelationGetRelid(heaprel) == pscan->ps_relid);
+ snapshot = RestoreSnapshot(pscan->ps_snapshot_data);
+ RegisterSnapshot(snapshot);
+ scan = index_beginscan_internal(indexrel, nkeys, norderbys, snapshot,
+ pscan, true);
+
+ /*
+ * Save additional parameters into the scandesc. Everything else was set
+ * up by index_beginscan_internal.
+ */
+ scan->heapRelation = heaprel;
+ scan->xs_snapshot = snapshot;
+
+ return scan;
+}
+
/* ----------------
* index_getnext_tid - get the next TID from a scan
*
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 1bb1acfea6..469e7abe4d 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -118,6 +118,9 @@ bthandler(PG_FUNCTION_ARGS)
amroutine->amendscan = btendscan;
amroutine->ammarkpos = btmarkpos;
amroutine->amrestrpos = btrestrpos;
+ amroutine->amestimateparallelscan = NULL;
+ amroutine->aminitparallelscan = NULL;
+ amroutine->amparallelrescan = NULL;
PG_RETURN_POINTER(amroutine);
}
diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c
index ca4b0bdbe4..78846bec66 100644
--- a/src/backend/access/spgist/spgutils.c
+++ b/src/backend/access/spgist/spgutils.c
@@ -68,6 +68,9 @@ spghandler(PG_FUNCTION_ARGS)
amroutine->amendscan = spgendscan;
amroutine->ammarkpos = NULL;
amroutine->amrestrpos = NULL;
+ amroutine->amestimateparallelscan = NULL;
+ amroutine->aminitparallelscan = NULL;
+ amroutine->amparallelrescan = NULL;
PG_RETURN_POINTER(amroutine);
}
diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h
index 6a5f279e7f..e91e41dc0f 100644
--- a/src/include/access/amapi.h
+++ b/src/include/access/amapi.h
@@ -137,6 +137,18 @@ typedef void (*ammarkpos_function) (IndexScanDesc scan);
/* restore marked scan position */
typedef void (*amrestrpos_function) (IndexScanDesc scan);
+/*
+ * Callback function signatures - for parallel index scans.
+ */
+
+/* estimate size of parallel scan descriptor */
+typedef Size (*amestimateparallelscan_function) (void);
+
+/* prepare for parallel index scan */
+typedef void (*aminitparallelscan_function) (void *target);
+
+/* (re)start parallel index scan */
+typedef void (*amparallelrescan_function) (IndexScanDesc scan);
/*
* API struct for an index AM. Note this must be stored in a single palloc'd
@@ -196,6 +208,11 @@ typedef struct IndexAmRoutine
amendscan_function amendscan;
ammarkpos_function ammarkpos; /* can be NULL */
amrestrpos_function amrestrpos; /* can be NULL */
+
+ /* interface functions to support parallel index scans */
+ amestimateparallelscan_function amestimateparallelscan; /* can be NULL */
+ aminitparallelscan_function aminitparallelscan; /* can be NULL */
+ amparallelrescan_function amparallelrescan; /* can be NULL */
} IndexAmRoutine;
diff --git a/src/include/access/genam.h b/src/include/access/genam.h
index b2e078aed2..51466b96e8 100644
--- a/src/include/access/genam.h
+++ b/src/include/access/genam.h
@@ -83,6 +83,8 @@ typedef bool (*IndexBulkDeleteCallback) (ItemPointer itemptr, void *state);
typedef struct IndexScanDescData *IndexScanDesc;
typedef struct SysScanDescData *SysScanDesc;
+typedef struct ParallelIndexScanDescData *ParallelIndexScanDesc;
+
/*
* Enumeration specifying the type of uniqueness check to perform in
* index_insert().
@@ -144,6 +146,13 @@ extern void index_rescan(IndexScanDesc scan,
extern void index_endscan(IndexScanDesc scan);
extern void index_markpos(IndexScanDesc scan);
extern void index_restrpos(IndexScanDesc scan);
+extern Size index_parallelscan_estimate(Relation indexrel, Snapshot snapshot);
+extern void index_parallelscan_initialize(Relation heaprel, Relation indexrel,
+ Snapshot snapshot, ParallelIndexScanDesc target);
+extern void index_parallelrescan(IndexScanDesc scan);
+extern IndexScanDesc index_beginscan_parallel(Relation heaprel,
+ Relation indexrel, int nkeys, int norderbys,
+ ParallelIndexScanDesc pscan);
extern ItemPointer index_getnext_tid(IndexScanDesc scan,
ScanDirection direction);
extern HeapTuple index_fetch_heap(IndexScanDesc scan);
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index 8746045d8d..ce3ca8d4ac 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -93,6 +93,7 @@ typedef struct IndexScanDescData
ScanKey keyData; /* array of index qualifier descriptors */
ScanKey orderByData; /* array of ordering op descriptors */
bool xs_want_itup; /* caller requests index tuples */
+ bool xs_temp_snap; /* unregister snapshot at scan end? */
/* signaling to index AM about killing index tuples */
bool kill_prior_tuple; /* last-returned tuple is dead */
@@ -126,8 +127,20 @@ typedef struct IndexScanDescData
/* state data for traversing HOT chains in index_getnext */
bool xs_continue_hot; /* T if must keep walking HOT chain */
+
+ /* parallel index scan information, in shared memory */
+ ParallelIndexScanDesc parallel_scan;
} IndexScanDescData;
+/* Generic structure for parallel scans */
+typedef struct ParallelIndexScanDescData
+{
+ Oid ps_relid;
+ Oid ps_indexid;
+ Size ps_offset; /* Offset in bytes of am specific structure */
+ char ps_snapshot_data[FLEXIBLE_ARRAY_MEMBER];
+} ParallelIndexScanDescData;
+
/* Struct for heap-or-index scans of system tables */
typedef struct SysScanDescData
{
diff --git a/src/include/c.h b/src/include/c.h
index efbb77f540..a2c043adfb 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -527,6 +527,9 @@ typedef NameData *Name;
#define PointerIsAligned(pointer, type) \
(((uintptr_t)(pointer) % (sizeof (type))) == 0)
+#define OffsetToPointer(base, offset) \
+ ((void *)((char *) base + offset))
+
#define OidIsValid(objectId) ((bool) ((objectId) != InvalidOid))
#define RegProcedureIsValid(p) OidIsValid(p)
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 993880da43..c4235ae63a 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1264,6 +1264,8 @@ OverrideSearchPath
OverrideStackEntry
PACE_HEADER
PACL
+ParallelIndexScanDesc
+ParallelIndexScanDescData
PATH
PBOOL
PCtxtHandle
--
cgit v1.2.3
From d1ecd539477fe640455dc890216a7c1561e047b4 Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Tue, 24 Jan 2017 16:59:18 -0500
Subject: Add a SHOW command to the replication command language.
This is useful infrastructure for an upcoming proposed patch to
allow the WAL segment size to be changed at initdb time; tools like
pg_basebackup need the ability to interrogate the server setting.
But it also doesn't seem like a bad thing to have independently of
that; it may find other uses in the future.
Robert Haas and Beena Emerson. (The original patch here was by
Beena, but I rewrote it to such a degree that most of the code
being committed here is mine.)
Discussion: http://postgr.es/m/CA+TgmobNo4qz06wHEmy9DszAre3dYx-WNhHSCbU9SAwf+9Ft6g@mail.gmail.com
---
doc/src/sgml/protocol.sgml | 24 +++++++++++
src/backend/access/common/tupdesc.c | 79 ++++++++++++++++++++++++++++++++++
src/backend/replication/repl_gram.y | 22 +++++++++-
src/backend/replication/repl_scanner.l | 1 +
src/backend/replication/walsender.c | 12 ++++++
src/backend/utils/misc/guc.c | 16 +++----
src/include/access/tupdesc.h | 7 +++
7 files changed, 151 insertions(+), 10 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index 5f89db5857..028ef10d91 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -1393,6 +1393,30 @@ The commands accepted in walsender mode are:
+
+ SHOWname
+ SHOW
+
+
+
+ Requests the server to send the current setting of a run-time parameter.
+ This is similar to the SQL command .
+
+
+
+
+ name>
+
+
+ The name of a run-time parameter. Available parameters are documented
+ in .
+
+
+
+
+
+
+
TIMELINE_HISTORYtliTIMELINE_HISTORY
diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c
index 54a32c0223..083c0303dc 100644
--- a/src/backend/access/common/tupdesc.c
+++ b/src/backend/access/common/tupdesc.c
@@ -20,6 +20,7 @@
#include "postgres.h"
#include "access/htup_details.h"
+#include "catalog/pg_collation.h"
#include "catalog/pg_type.h"
#include "miscadmin.h"
#include "parser/parse_type.h"
@@ -553,6 +554,84 @@ TupleDescInitEntry(TupleDesc desc,
ReleaseSysCache(tuple);
}
+/*
+ * TupleDescInitBuiltinEntry
+ * Initialize a tuple descriptor without catalog access. Only
+ * a limited range of builtin types are supported.
+ */
+void
+TupleDescInitBuiltinEntry(TupleDesc desc,
+ AttrNumber attributeNumber,
+ const char *attributeName,
+ Oid oidtypeid,
+ int32 typmod,
+ int attdim)
+{
+ Form_pg_attribute att;
+
+ /* sanity checks */
+ AssertArg(PointerIsValid(desc));
+ AssertArg(attributeNumber >= 1);
+ AssertArg(attributeNumber <= desc->natts);
+
+ /* initialize the attribute fields */
+ att = desc->attrs[attributeNumber - 1];
+ att->attrelid = 0; /* dummy value */
+
+ /* unlike TupleDescInitEntry, we require an attribute name */
+ Assert(attributeName != NULL);
+ namestrcpy(&(att->attname), attributeName);
+
+ att->attstattarget = -1;
+ att->attcacheoff = -1;
+ att->atttypmod = typmod;
+
+ att->attnum = attributeNumber;
+ att->attndims = attdim;
+
+ att->attnotnull = false;
+ att->atthasdef = false;
+ att->attisdropped = false;
+ att->attislocal = true;
+ att->attinhcount = 0;
+ /* attacl, attoptions and attfdwoptions are not present in tupledescs */
+
+ att->atttypid = oidtypeid;
+
+ /*
+ * Our goal here is to support just enough types to let basic builtin
+ * commands work without catalog access - e.g. so that we can do certain
+ * things even in processes that are not connected to a database.
+ */
+ switch (oidtypeid)
+ {
+ case TEXTOID:
+ case TEXTARRAYOID:
+ att->attlen = -1;
+ att->attbyval = false;
+ att->attalign = 'i';
+ att->attstorage = 'x';
+ att->attcollation = DEFAULT_COLLATION_OID;
+ break;
+
+ case BOOLOID:
+ att->attlen = 1;
+ att->attbyval = true;
+ att->attalign = 'c';
+ att->attstorage = 'p';
+ att->attcollation = InvalidOid;
+ break;
+
+ case INT4OID:
+ att->attlen = 4;
+ att->attbyval = true;
+ att->attalign = 'i';
+ att->attstorage = 'p';
+ att->attcollation = InvalidOid;
+ break;
+ }
+}
+
/*
* TupleDescInitEntryCollation
*
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index d962c76819..b35d0f0cd1 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -61,6 +61,7 @@ Node *replication_parse_result;
/* Keyword tokens. */
%token K_BASE_BACKUP
%token K_IDENTIFY_SYSTEM
+%token K_SHOW
%token K_START_REPLICATION
%token K_CREATE_REPLICATION_SLOT
%token K_DROP_REPLICATION_SLOT
@@ -82,14 +83,14 @@ Node *replication_parse_result;
%type command
%type base_backup start_replication start_logical_replication
create_replication_slot drop_replication_slot identify_system
- timeline_history
+ timeline_history show
%type base_backup_opt_list
%type base_backup_opt
%type opt_timeline
%type plugin_options plugin_opt_list
%type plugin_opt_elem
%type plugin_opt_arg
-%type opt_slot
+%type opt_slot var_name
%type opt_reserve_wal opt_temporary
%%
@@ -112,6 +113,7 @@ command:
| create_replication_slot
| drop_replication_slot
| timeline_history
+ | show
;
/*
@@ -124,6 +126,22 @@ identify_system:
}
;
+/*
+ * SHOW setting
+ */
+show:
+ K_SHOW var_name
+ {
+ VariableShowStmt *n = makeNode(VariableShowStmt);
+ n->name = $2;
+ $$ = (Node *) n;
+ }
+
+var_name: IDENT { $$ = $1; }
+ | var_name '.' IDENT
+ { $$ = psprintf("%s.%s", $1, $3); }
+ ;
+
/*
* BASE_BACKUP [LABEL '
+
+ passfile
+
+
+ Specifies the name of the file used to store passwords
+ (see ).
+ Defaults to ~/.pgpass, or
+ %APPDATA%\postgresql\pgpass.conf> on Microsoft Windows.
+ (No error is reported if this file does not exist.)
+
+
+
+
connect_timeout
@@ -6893,8 +6906,8 @@ myEventProc(PGEventId evtId, void *evtInfo, void *passThrough)
Use of this environment variable
is not recommended for security reasons, as some operating systems
allow non-root users to see process environment variables via
- ps>; instead consider using the
- ~/.pgpass> file (see ).
+ ps>; instead consider using a password file
+ (see ).
@@ -6903,9 +6916,8 @@ myEventProc(PGEventId evtId, void *evtInfo, void *passThrough)
PGPASSFILE
- PGPASSFILE specifies the name of the password file to
- use for lookups. If not set, it defaults to ~/.pgpass>
- (see ).
+ PGPASSFILE behaves the same as the connection parameter.
@@ -7187,13 +7199,16 @@ myEventProc(PGEventId evtId, void *evtInfo, void *passThrough)
- The file .pgpass in a user's home directory or the
- file referenced by PGPASSFILE can contain passwords to
+ The file .pgpass in a user's home directory can
+ contain passwords to
be used if the connection requires a password (and no password has been
specified otherwise). On Microsoft Windows the file is named
%APPDATA%\postgresql\pgpass.conf> (where
%APPDATA%> refers to the Application Data subdirectory in
the user's profile).
+ Alternatively, a password file can be specified
+ using the connection parameter
+ or the environment variable PGPASSFILE.
@@ -7219,8 +7234,8 @@ myEventProc(PGEventId evtId, void *evtInfo, void *passThrough)
- On Unix systems, the permissions on .pgpass must
- disallow any access to world or group; achieve this by the command
+ On Unix systems, the permissions on a password file must
+ disallow any access to world or group; achieve this by a command such as
chmod 0600 ~/.pgpass. If the permissions are less
strict than this, the file will be ignored. On Microsoft Windows, it
is assumed that the file is stored in a directory that is secure, so
diff --git a/src/interfaces/libpq/fe-auth.c b/src/interfaces/libpq/fe-auth.c
index fe1e276f56..2845d3b9d2 100644
--- a/src/interfaces/libpq/fe-auth.c
+++ b/src/interfaces/libpq/fe-auth.c
@@ -686,11 +686,12 @@ pg_fe_sendauth(AuthRequest areq, PGconn *conn)
case AUTH_REQ_MD5:
case AUTH_REQ_PASSWORD:
{
- char *password = conn->connhost[conn->whichhost].password;
+ char *password;
+ conn->password_needed = true;
+ password = conn->connhost[conn->whichhost].password;
if (password == NULL)
password = conn->pgpass;
- conn->password_needed = true;
if (password == NULL || password[0] == '\0')
{
printfPQExpBuffer(&conn->errorMessage,
diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index e13fc98249..0dda1804a5 100644
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -107,7 +107,6 @@ static int ldapServiceLookup(const char *purl, PQconninfoOption *options,
#define DefaultTty ""
#define DefaultOption ""
#define DefaultAuthtype ""
-#define DefaultPassword ""
#define DefaultTargetSessionAttrs "any"
#ifdef USE_SSL
#define DefaultSSLMode "prefer"
@@ -185,6 +184,10 @@ static const internalPQconninfoOption PQconninfoOptions[] = {
"Database-Password", "*", 20,
offsetof(struct pg_conn, pgpass)},
+ {"passfile", "PGPASSFILE", NULL, NULL,
+ "Database-Password-File", "", 64,
+ offsetof(struct pg_conn, pgpassfile)},
+
{"connect_timeout", "PGCONNECT_TIMEOUT", NULL, NULL,
"Connect-timeout", "", 10, /* strlen(INT32_MAX) == 10 */
offsetof(struct pg_conn, connect_timeout)},
@@ -382,10 +385,9 @@ static int parseServiceFile(const char *serviceFile,
PQExpBuffer errorMessage,
bool *group_found);
static char *pwdfMatchesString(char *buf, char *token);
-static char *PasswordFromFile(char *hostname, char *port, char *dbname,
- char *username);
-static bool getPgPassFilename(char *pgpassfile);
-static void dot_pg_pass_warning(PGconn *conn);
+static char *passwordFromFile(char *hostname, char *port, char *dbname,
+ char *username, char *pgpassfile);
+static void pgpassfileWarning(PGconn *conn);
static void default_threadlock(int acquire);
@@ -957,19 +959,40 @@ connectOptions2(PGconn *conn)
{
int i;
- if (conn->pgpass)
- free(conn->pgpass);
- conn->pgpass = strdup(DefaultPassword);
- if (!conn->pgpass)
- goto oom_error;
- for (i = 0; i < conn->nconnhost; ++i)
+ if (conn->pgpassfile == NULL || conn->pgpassfile[0] == '\0')
+ {
+ /* Identify password file to use; fail if we can't */
+ char homedir[MAXPGPATH];
+
+ if (!pqGetHomeDirectory(homedir, sizeof(homedir)))
+ {
+ conn->status = CONNECTION_BAD;
+ printfPQExpBuffer(&conn->errorMessage,
+ libpq_gettext("could not get home directory to locate password file\n"));
+ return false;
+ }
+
+ if (conn->pgpassfile)
+ free(conn->pgpassfile);
+ conn->pgpassfile = malloc(MAXPGPATH);
+ if (!conn->pgpassfile)
+ goto oom_error;
+
+ snprintf(conn->pgpassfile, MAXPGPATH, "%s/%s", homedir, PGPASSFILE);
+ }
+
+ for (i = 0; i < conn->nconnhost; i++)
{
+ /* Try to get a password for this host from pgpassfile */
conn->connhost[i].password =
- PasswordFromFile(conn->connhost[i].host,
+ passwordFromFile(conn->connhost[i].host,
conn->connhost[i].port,
- conn->dbName, conn->pguser);
+ conn->dbName,
+ conn->pguser,
+ conn->pgpassfile);
+ /* If we got one, set pgpassfile_used */
if (conn->connhost[i].password != NULL)
- conn->dot_pgpass_used = true;
+ conn->pgpassfile_used = true;
}
}
@@ -3016,7 +3039,7 @@ keep_going: /* We will come back to here until there is
error_return:
- dot_pg_pass_warning(conn);
+ pgpassfileWarning(conn);
/*
* We used to close the socket at this point, but that makes it awkward
@@ -3147,7 +3170,7 @@ makeEmptyPGconn(void)
conn->sock = PGINVALID_SOCKET;
conn->auth_req_received = false;
conn->password_needed = false;
- conn->dot_pgpass_used = false;
+ conn->pgpassfile_used = false;
#ifdef USE_SSL
conn->allow_ssl_try = true;
conn->wait_ssl_try = false;
@@ -3256,6 +3279,8 @@ freePGconn(PGconn *conn)
free(conn->pguser);
if (conn->pgpass)
free(conn->pgpass);
+ if (conn->pgpassfile)
+ free(conn->pgpassfile);
if (conn->keepalives)
free(conn->keepalives);
if (conn->keepalives_idle)
@@ -5794,6 +5819,9 @@ PQpass(const PGconn *conn)
password = conn->connhost[conn->whichhost].password;
if (password == NULL)
password = conn->pgpass;
+ /* Historically we've returned "" not NULL for no password specified */
+ if (password == NULL)
+ password = "";
return password;
}
@@ -6160,10 +6188,10 @@ pwdfMatchesString(char *buf, char *token)
/* Get a password from the password file. Return value is malloc'd. */
static char *
-PasswordFromFile(char *hostname, char *port, char *dbname, char *username)
+passwordFromFile(char *hostname, char *port, char *dbname,
+ char *username, char *pgpassfile)
{
FILE *fp;
- char pgpassfile[MAXPGPATH];
struct stat stat_buf;
#define LINELEN NAMEDATALEN*5
@@ -6190,9 +6218,6 @@ PasswordFromFile(char *hostname, char *port, char *dbname, char *username)
if (port == NULL)
port = DEF_PGPORT_STR;
- if (!getPgPassFilename(pgpassfile))
- return NULL;
-
/* If password file cannot be opened, ignore it. */
if (stat(pgpassfile, &stat_buf) != 0)
return NULL;
@@ -6286,46 +6311,23 @@ PasswordFromFile(char *hostname, char *port, char *dbname, char *username)
}
-static bool
-getPgPassFilename(char *pgpassfile)
-{
- char *passfile_env;
-
- if ((passfile_env = getenv("PGPASSFILE")) != NULL)
- /* use the literal path from the environment, if set */
- strlcpy(pgpassfile, passfile_env, MAXPGPATH);
- else
- {
- char homedir[MAXPGPATH];
-
- if (!pqGetHomeDirectory(homedir, sizeof(homedir)))
- return false;
- snprintf(pgpassfile, MAXPGPATH, "%s/%s", homedir, PGPASSFILE);
- }
- return true;
-}
-
/*
* If the connection failed, we should mention if
- * we got the password from .pgpass in case that
+ * we got the password from the pgpassfile in case that
* password is wrong.
*/
static void
-dot_pg_pass_warning(PGconn *conn)
+pgpassfileWarning(PGconn *conn)
{
- /* If it was 'invalid authorization', add .pgpass mention */
+ /* If it was 'invalid authorization', add pgpassfile mention */
/* only works with >= 9.0 servers */
- if (conn->dot_pgpass_used && conn->password_needed && conn->result &&
+ if (conn->pgpassfile_used && conn->password_needed && conn->result &&
strcmp(PQresultErrorField(conn->result, PG_DIAG_SQLSTATE),
ERRCODE_INVALID_PASSWORD) == 0)
{
- char pgpassfile[MAXPGPATH];
-
- if (!getPgPassFilename(pgpassfile))
- return;
appendPQExpBuffer(&conn->errorMessage,
libpq_gettext("password retrieved from file \"%s\"\n"),
- pgpassfile);
+ conn->pgpassfile);
}
}
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index 7289bd15c0..c655388864 100644
--- a/src/interfaces/libpq/libpq-int.h
+++ b/src/interfaces/libpq/libpq-int.h
@@ -343,6 +343,7 @@ struct pg_conn
char *replication; /* connect as the replication standby? */
char *pguser; /* Postgres username and password, if any */
char *pgpass;
+ char *pgpassfile; /* path to a file containing password(s) */
char *keepalives; /* use TCP keepalives? */
char *keepalives_idle; /* time between TCP keepalives */
char *keepalives_interval; /* time between TCP keepalive
@@ -407,7 +408,7 @@ struct pg_conn
bool auth_req_received; /* true if any type of auth req
* received */
bool password_needed; /* true if server demanded a password */
- bool dot_pgpass_used; /* true if used .pgpass */
+ bool pgpassfile_used; /* true if password is from pgpassfile */
bool sigpipe_so; /* have we masked SIGPIPE via SO_NOSIGPIPE? */
bool sigpipe_flag; /* can we mask SIGPIPE via MSG_NOSIGNAL? */
--
cgit v1.2.3
From 123f03ba2c6e2d85a5a900e79dd5f216bfb37e25 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Wed, 25 Jan 2017 08:59:24 -0500
Subject: doc: Update ALTER SEQUENCE documentation to match
Update documentation to match change in
0bc1207aeb3de951bf95a9e9899b1256216d65f5.
---
doc/src/sgml/ref/alter_sequence.sgml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/alter_sequence.sgml b/doc/src/sgml/ref/alter_sequence.sgml
index 47d3c8291f..307366526f 100644
--- a/doc/src/sgml/ref/alter_sequence.sgml
+++ b/doc/src/sgml/ref/alter_sequence.sgml
@@ -102,7 +102,7 @@ ALTER SEQUENCE [ IF EXISTS ] name S
class="parameter">minvalue determines
the minimum value a sequence can generate. If NO
MINVALUE is specified, the defaults of 1 and
- -263>-1 for ascending and descending sequences,
+ -263> for ascending and descending sequences,
respectively, will be used. If neither option is specified,
the current minimum value will be maintained.
--
cgit v1.2.3
From 1e7c4bb0049732ece651d993d03bb6772e5d281a Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Wed, 25 Jan 2017 09:17:18 -0500
Subject: Change unknown-type literals to type text in SELECT and RETURNING
lists.
Previously, we left such literals alone if the query or subquery had
no properties forcing a type decision to be made (such as an ORDER BY or
DISTINCT clause using that output column). This meant that "unknown" could
be an exposed output column type, which has never been a great idea because
it could result in strange failures later on. For example, an outer query
that tried to do any operations on an unknown-type subquery output would
generally fail with some weird error like "failed to find conversion
function from unknown to text" or "could not determine which collation to
use for string comparison". Also, if the case occurred in a CREATE VIEW's
query then the view would have an unknown-type column, causing similar
failures in queries trying to use the view.
To fix, at the tail end of parse analysis of a query, forcibly convert any
remaining "unknown" literals in its SELECT or RETURNING list to type text.
However, provide a switch to suppress that, and use it in the cases of
SELECT inside a set operation or INSERT command. In those cases we already
had type resolution rules that make use of context information from outside
the subquery proper, and we don't want to change that behavior.
Also, change creation of an unknown-type column in a relation from a
warning to a hard error. The error should be unreachable now in CREATE
VIEW or CREATE MATVIEW, but it's still possible to explicitly say "unknown"
in CREATE TABLE or CREATE (composite) TYPE. We want to forbid that because
it's nothing but a foot-gun.
This change creates a pg_upgrade failure case: a matview that contains an
unknown-type column can't be pg_upgraded, because reparsing the matview's
defining query will now decide that the column is of type text, which
doesn't match the cstring-like storage that the old materialized column
would actually have. Add a checking pass to detect that. While at it,
we can detect tables or composite types that would fail, essentially
for free. Those would fail safely anyway later on, but we might as
well fail earlier.
This patch is by me, but it owes something to previous investigations
by Rahila Syed. Also thanks to Ashutosh Bapat and Michael Paquier for
review.
Discussion: https://postgr.es/m/CAH2L28uwwbL9HUM-WR=hromW1Cvamkn7O-g8fPY2m=_7muJ0oA@mail.gmail.com
---
doc/src/sgml/ref/create_view.sgml | 7 +-
doc/src/sgml/typeconv.sgml | 50 +++++++++++-
src/backend/catalog/heap.c | 14 +---
src/backend/parser/analyze.c | 29 ++++++-
src/backend/parser/parse_clause.c | 3 +-
src/backend/parser/parse_cte.c | 11 ++-
src/backend/parser/parse_expr.c | 2 +-
src/backend/parser/parse_node.c | 1 +
src/backend/parser/parse_target.c | 33 +++++++-
src/bin/pg_upgrade/check.c | 4 +
src/bin/pg_upgrade/pg_upgrade.h | 1 +
src/bin/pg_upgrade/version.c | 97 ++++++++++++++++++++++++
src/include/parser/analyze.h | 3 +-
src/include/parser/parse_node.h | 5 ++
src/include/parser/parse_target.h | 1 +
src/test/regress/expected/create_table.out | 8 ++
src/test/regress/expected/create_view.out | 26 +++++++
src/test/regress/expected/matview.out | 27 +++++++
src/test/regress/expected/subselect.out | 31 ++++++++
src/test/regress/expected/with.out | 20 ++++-
src/test/regress/output/create_function_1.source | 2 +-
src/test/regress/sql/create_table.sql | 8 ++
src/test/regress/sql/create_view.sql | 8 ++
src/test/regress/sql/matview.sql | 8 ++
src/test/regress/sql/subselect.sql | 10 +++
src/test/regress/sql/with.sql | 13 +++-
26 files changed, 386 insertions(+), 36 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/create_view.sgml b/doc/src/sgml/ref/create_view.sgml
index 8641e1925e..a83d9564e5 100644
--- a/doc/src/sgml/ref/create_view.sgml
+++ b/doc/src/sgml/ref/create_view.sgml
@@ -251,9 +251,10 @@ CREATE VIEW [ schema> . ] view_name> AS WITH RECUR
CREATE VIEW vista AS SELECT 'Hello World';
- is bad form in two ways: the column name defaults to ?column?>,
- and the column data type defaults to unknown>. If you want a
- string literal in a view's result, use something like:
+ is bad form because the column name defaults to ?column?>;
+ also, the column data type defaults to text>, which might not
+ be what you wanted. Better style for a string literal in a view's
+ result is something like:
CREATE VIEW vista AS SELECT text 'Hello World' AS hello;
diff --git a/doc/src/sgml/typeconv.sgml b/doc/src/sgml/typeconv.sgml
index c031c01ed3..63d41f03f3 100644
--- a/doc/src/sgml/typeconv.sgml
+++ b/doc/src/sgml/typeconv.sgml
@@ -984,7 +984,8 @@ domain's base type for all subsequent steps.
If all inputs are of type unknown, resolve as type
text (the preferred type of the string category).
-Otherwise, unknown inputs are ignored.
+Otherwise, unknown inputs are ignored for the purposes
+of the remaining rules.
@@ -1076,6 +1077,53 @@ but integer> can be implicitly cast to real>, the union
result type is resolved as real>.
+
+
+
+SELECT Output Columns
+
+
+ SELECT
+ determination of result type
+
+
+
+The rules given in the preceding sections will result in assignment
+of non-unknown> data types to all expressions in a SQL query,
+except for unspecified-type literals that appear as simple output
+columns of a SELECT> command. For example, in
+
+
+SELECT 'Hello World';
+
+
+there is nothing to identify what type the string literal should be
+taken as. In this situation PostgreSQL> will fall back
+to resolving the literal's type as text>.
+
+
+
+When the SELECT> is one arm of a UNION>
+(or INTERSECT> or EXCEPT>) construct, or when it
+appears within INSERT ... SELECT>, this rule is not applied
+since rules given in preceding sections take precedence. The type of an
+unspecified-type literal can be taken from the other UNION> arm
+in the first case, or from the destination column in the second case.
+
+
+
+RETURNING> lists are treated the same as SELECT>
+output lists for this purpose.
+
+
+
+
+ Prior to PostgreSQL> 10, this rule did not exist, and
+ unspecified-type literals in a SELECT> output list were
+ left as type unknown>. That had assorted bad consequences,
+ so it's been changed.
+
+
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index a56ddbbd3b..d7a3513d29 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -490,18 +490,8 @@ CheckAttributeType(const char *attname,
char att_typtype = get_typtype(atttypid);
Oid att_typelem;
- if (atttypid == UNKNOWNOID)
- {
- /*
- * Warn user, but don't fail, if column to be created has UNKNOWN type
- * (usually as a result of a 'retrieve into' - jolly)
- */
- ereport(WARNING,
- (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
- errmsg("column \"%s\" has type %s", attname, "unknown"),
- errdetail("Proceeding with relation creation anyway.")));
- }
- else if (att_typtype == TYPTYPE_PSEUDO)
+ if (atttypid == UNKNOWNOID ||
+ att_typtype == TYPTYPE_PSEUDO)
{
/*
* Refuse any attempt to create a pseudo-type column, except for a
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index a02a77a03a..f954dc15f0 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -156,13 +156,15 @@ parse_analyze_varparams(RawStmt *parseTree, const char *sourceText,
Query *
parse_sub_analyze(Node *parseTree, ParseState *parentParseState,
CommonTableExpr *parentCTE,
- bool locked_from_parent)
+ bool locked_from_parent,
+ bool resolve_unknowns)
{
ParseState *pstate = make_parsestate(parentParseState);
Query *query;
pstate->p_parent_cte = parentCTE;
pstate->p_locked_from_parent = locked_from_parent;
+ pstate->p_resolve_unknowns = resolve_unknowns;
query = transformStmt(pstate, parseTree);
@@ -570,10 +572,17 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
* otherwise the behavior of SELECT within INSERT might be different
* from a stand-alone SELECT. (Indeed, Postgres up through 6.5 had
* bugs of just that nature...)
+ *
+ * The sole exception is that we prevent resolving unknown-type
+ * outputs as TEXT. This does not change the semantics since if the
+ * column type matters semantically, it would have been resolved to
+ * something else anyway. Doing this lets us resolve such outputs as
+ * the target column's type, which we handle below.
*/
sub_pstate->p_rtable = sub_rtable;
sub_pstate->p_joinexprs = NIL; /* sub_rtable has no joins */
sub_pstate->p_namespace = sub_namespace;
+ sub_pstate->p_resolve_unknowns = false;
selectQuery = transformStmt(sub_pstate, stmt->selectStmt);
@@ -1269,6 +1278,10 @@ transformSelectStmt(ParseState *pstate, SelectStmt *stmt)
pstate->p_windowdefs,
&qry->targetList);
+ /* resolve any still-unresolved output columns as being type text */
+ if (pstate->p_resolve_unknowns)
+ resolveTargetListUnknowns(pstate, qry->targetList);
+
qry->rtable = pstate->p_rtable;
qry->jointree = makeFromExpr(pstate->p_joinlist, qual);
@@ -1843,11 +1856,19 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
/*
* Transform SelectStmt into a Query.
*
+ * This works the same as SELECT transformation normally would, except
+ * that we prevent resolving unknown-type outputs as TEXT. This does
+ * not change the subquery's semantics since if the column type
+ * matters semantically, it would have been resolved to something else
+ * anyway. Doing this lets us resolve such outputs using
+ * select_common_type(), below.
+ *
* Note: previously transformed sub-queries don't affect the parsing
* of this sub-query, because they are not in the toplevel pstate's
* namespace list.
*/
- selectQuery = parse_sub_analyze((Node *) stmt, pstate, NULL, false);
+ selectQuery = parse_sub_analyze((Node *) stmt, pstate,
+ NULL, false, false);
/*
* Check for bogus references to Vars on the current query level (but
@@ -2350,6 +2371,10 @@ transformReturningList(ParseState *pstate, List *returningList)
/* mark column origins */
markTargetListOrigins(pstate, rlist);
+ /* resolve any still-unresolved output columns as being type text */
+ if (pstate->p_resolve_unknowns)
+ resolveTargetListUnknowns(pstate, rlist);
+
/* restore state */
pstate->p_next_resno = save_next_resno;
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 624ab41371..4769e78620 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -471,7 +471,8 @@ transformRangeSubselect(ParseState *pstate, RangeSubselect *r)
* Analyze and transform the subquery.
*/
query = parse_sub_analyze(r->subquery, pstate, NULL,
- isLockedRefname(pstate, r->alias->aliasname));
+ isLockedRefname(pstate, r->alias->aliasname),
+ true);
/* Restore state */
pstate->p_lateral_active = false;
diff --git a/src/backend/parser/parse_cte.c b/src/backend/parser/parse_cte.c
index fc8c15b268..dfbcaa2cdc 100644
--- a/src/backend/parser/parse_cte.c
+++ b/src/backend/parser/parse_cte.c
@@ -241,7 +241,7 @@ analyzeCTE(ParseState *pstate, CommonTableExpr *cte)
/* Analysis not done already */
Assert(!IsA(cte->ctequery, Query));
- query = parse_sub_analyze(cte->ctequery, pstate, cte, false);
+ query = parse_sub_analyze(cte->ctequery, pstate, cte, false, true);
cte->ctequery = (Node *) query;
/*
@@ -393,11 +393,10 @@ analyzeCTETargetList(ParseState *pstate, CommonTableExpr *cte, List *tlist)
/*
* If the CTE is recursive, force the exposed column type of any
- * "unknown" column to "text". This corresponds to the fact that
- * SELECT 'foo' UNION SELECT 'bar' will ultimately produce text. We
- * might see "unknown" as a result of an untyped literal in the
- * non-recursive term's select list, and if we don't convert to text
- * then we'll have a mismatch against the UNION result.
+ * "unknown" column to "text". We must deal with this here because
+ * we're called on the non-recursive term before there's been any
+ * attempt to force unknown output columns to some other type. We
+ * have to resolve unknowns before looking at the recursive term.
*
* The column might contain 'foo' COLLATE "bar", so don't override
* collation if it's already set.
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index add3be6566..c43ef19df5 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -1846,7 +1846,7 @@ transformSubLink(ParseState *pstate, SubLink *sublink)
/*
* OK, let's transform the sub-SELECT.
*/
- qtree = parse_sub_analyze(sublink->subselect, pstate, NULL, false);
+ qtree = parse_sub_analyze(sublink->subselect, pstate, NULL, false, true);
/*
* Check that we got a SELECT. Anything else should be impossible given
diff --git a/src/backend/parser/parse_node.c b/src/backend/parser/parse_node.c
index 73e7d65c35..2a5f147ca1 100644
--- a/src/backend/parser/parse_node.c
+++ b/src/backend/parser/parse_node.c
@@ -51,6 +51,7 @@ make_parsestate(ParseState *parentParseState)
/* Fill in fields that don't start at null/false/zero */
pstate->p_next_resno = 1;
+ pstate->p_resolve_unknowns = true;
if (parentParseState)
{
diff --git a/src/backend/parser/parse_target.c b/src/backend/parser/parse_target.c
index 081a8dd468..2576e31239 100644
--- a/src/backend/parser/parse_target.c
+++ b/src/backend/parser/parse_target.c
@@ -288,13 +288,42 @@ transformExpressionList(ParseState *pstate, List *exprlist,
}
+/*
+ * resolveTargetListUnknowns()
+ * Convert any unknown-type targetlist entries to type TEXT.
+ *
+ * We do this after we've exhausted all other ways of identifying the output
+ * column types of a query.
+ */
+void
+resolveTargetListUnknowns(ParseState *pstate, List *targetlist)
+{
+ ListCell *l;
+
+ foreach(l, targetlist)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(l);
+ Oid restype = exprType((Node *) tle->expr);
+
+ if (restype == UNKNOWNOID)
+ {
+ tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr,
+ restype, TEXTOID, -1,
+ COERCION_IMPLICIT,
+ COERCE_IMPLICIT_CAST,
+ -1);
+ }
+ }
+}
+
+
/*
* markTargetListOrigins()
* Mark targetlist columns that are simple Vars with the source
* table's OID and column number.
*
- * Currently, this is done only for SELECT targetlists, since we only
- * need the info if we are going to send it to the frontend.
+ * Currently, this is done only for SELECT targetlists and RETURNING lists,
+ * since we only need the info if we are going to send it to the frontend.
*/
void
markTargetListOrigins(ParseState *pstate, List *targetlist)
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c
index b6efad429a..ff1d7a207a 100644
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -99,6 +99,10 @@ check_and_dump_old_cluster(bool live_check)
check_for_reg_data_type_usage(&old_cluster);
check_for_isn_and_int8_passing_mismatch(&old_cluster);
+ /* Pre-PG 10 allowed tables with 'unknown' type columns */
+ if (GET_MAJOR_VERSION(old_cluster.major_version) <= 906)
+ old_9_6_check_for_unknown_data_type_usage(&old_cluster);
+
/* 9.5 and below should not have roles starting with pg_ */
if (GET_MAJOR_VERSION(old_cluster.major_version) <= 905)
check_for_pg_role_prefix(&old_cluster);
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 42e7aebb01..968ab8a668 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -442,6 +442,7 @@ void pg_putenv(const char *var, const char *val);
void new_9_0_populate_pg_largeobject_metadata(ClusterInfo *cluster,
bool check_mode);
void old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster);
+void old_9_6_check_for_unknown_data_type_usage(ClusterInfo *cluster);
/* parallel.c */
void parallel_exec_prog(const char *log_file, const char *opt_log_file,
diff --git a/src/bin/pg_upgrade/version.c b/src/bin/pg_upgrade/version.c
index fb56feabd9..aa462daed3 100644
--- a/src/bin/pg_upgrade/version.c
+++ b/src/bin/pg_upgrade/version.c
@@ -185,3 +185,100 @@ old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster)
else
check_ok();
}
+
+
+/*
+ * old_9_6_check_for_unknown_data_type_usage()
+ * 9.6 -> 10
+ * It's no longer allowed to create tables or views with "unknown"-type
+ * columns. We do not complain about views with such columns, because
+ * they should get silently converted to "text" columns during the DDL
+ * dump and reload; it seems unlikely to be worth making users do that
+ * by hand. However, if there's a table with such a column, the DDL
+ * reload will fail, so we should pre-detect that rather than failing
+ * mid-upgrade. Worse, if there's a matview with such a column, the
+ * DDL reload will silently change it to "text" which won't match the
+ * on-disk storage (which is like "cstring"). So we *must* reject that.
+ * Also check composite types, in case they are used for table columns.
+ * We needn't check indexes, because "unknown" has no opclasses.
+ */
+void
+old_9_6_check_for_unknown_data_type_usage(ClusterInfo *cluster)
+{
+ int dbnum;
+ FILE *script = NULL;
+ bool found = false;
+ char output_path[MAXPGPATH];
+
+ prep_status("Checking for invalid \"unknown\" user columns");
+
+ snprintf(output_path, sizeof(output_path), "tables_using_unknown.txt");
+
+ for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
+ {
+ PGresult *res;
+ bool db_used = false;
+ int ntups;
+ int rowno;
+ int i_nspname,
+ i_relname,
+ i_attname;
+ DbInfo *active_db = &cluster->dbarr.dbs[dbnum];
+ PGconn *conn = connectToServer(cluster, active_db->db_name);
+
+ res = executeQueryOrDie(conn,
+ "SELECT n.nspname, c.relname, a.attname "
+ "FROM pg_catalog.pg_class c, "
+ " pg_catalog.pg_namespace n, "
+ " pg_catalog.pg_attribute a "
+ "WHERE c.oid = a.attrelid AND "
+ " NOT a.attisdropped AND "
+ " a.atttypid = 'pg_catalog.unknown'::pg_catalog.regtype AND "
+ " c.relkind IN ('r', 'c', 'm') AND "
+ " c.relnamespace = n.oid AND "
+ /* exclude possible orphaned temp tables */
+ " n.nspname !~ '^pg_temp_' AND "
+ " n.nspname !~ '^pg_toast_temp_' AND "
+ " n.nspname NOT IN ('pg_catalog', 'information_schema')");
+
+ ntups = PQntuples(res);
+ i_nspname = PQfnumber(res, "nspname");
+ i_relname = PQfnumber(res, "relname");
+ i_attname = PQfnumber(res, "attname");
+ for (rowno = 0; rowno < ntups; rowno++)
+ {
+ found = true;
+ if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL)
+ pg_fatal("could not open file \"%s\": %s\n", output_path,
+ strerror(errno));
+ if (!db_used)
+ {
+ fprintf(script, "Database: %s\n", active_db->db_name);
+ db_used = true;
+ }
+ fprintf(script, " %s.%s.%s\n",
+ PQgetvalue(res, rowno, i_nspname),
+ PQgetvalue(res, rowno, i_relname),
+ PQgetvalue(res, rowno, i_attname));
+ }
+
+ PQclear(res);
+
+ PQfinish(conn);
+ }
+
+ if (script)
+ fclose(script);
+
+ if (found)
+ {
+ pg_log(PG_REPORT, "fatal\n");
+ pg_fatal("Your installation contains the \"unknown\" data type in user tables. This\n"
+ "data type is no longer allowed in tables, so this cluster cannot currently\n"
+ "be upgraded. You can remove the problem tables and restart the upgrade.\n"
+ "A list of the problem columns is in the file:\n"
+ " %s\n\n", output_path);
+ }
+ else
+ check_ok();
+}
diff --git a/src/include/parser/analyze.h b/src/include/parser/analyze.h
index a7e5c55ab4..17259409a7 100644
--- a/src/include/parser/analyze.h
+++ b/src/include/parser/analyze.h
@@ -29,7 +29,8 @@ extern Query *parse_analyze_varparams(RawStmt *parseTree, const char *sourceText
extern Query *parse_sub_analyze(Node *parseTree, ParseState *parentParseState,
CommonTableExpr *parentCTE,
- bool locked_from_parent);
+ bool locked_from_parent,
+ bool resolve_unknowns);
extern Query *transformTopLevelStmt(ParseState *pstate, RawStmt *parseTree);
extern Query *transformStmt(ParseState *pstate, Node *parseTree);
diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h
index bc3eea9ba5..3a25d9598d 100644
--- a/src/include/parser/parse_node.h
+++ b/src/include/parser/parse_node.h
@@ -150,6 +150,9 @@ typedef Node *(*CoerceParamHook) (ParseState *pstate, Param *param,
* p_locked_from_parent: true if parent query level applies FOR UPDATE/SHARE
* to this subquery as a whole.
*
+ * p_resolve_unknowns: resolve unknown-type SELECT output columns as type TEXT
+ * (this is true by default).
+ *
* p_hasAggs, p_hasWindowFuncs, etc: true if we've found any of the indicated
* constructs in the query.
*
@@ -182,6 +185,8 @@ struct ParseState
List *p_locking_clause; /* raw FOR UPDATE/FOR SHARE info */
bool p_locked_from_parent; /* parent has marked this subquery
* with FOR UPDATE/FOR SHARE */
+ bool p_resolve_unknowns; /* resolve unknown-type SELECT outputs
+ * as type text */
/* Flags telling about things found in the query: */
bool p_hasAggs;
diff --git a/src/include/parser/parse_target.h b/src/include/parser/parse_target.h
index e035aacbf4..d06a235df0 100644
--- a/src/include/parser/parse_target.h
+++ b/src/include/parser/parse_target.h
@@ -21,6 +21,7 @@ extern List *transformTargetList(ParseState *pstate, List *targetlist,
ParseExprKind exprKind);
extern List *transformExpressionList(ParseState *pstate, List *exprlist,
ParseExprKind exprKind, bool allowDefault);
+extern void resolveTargetListUnknowns(ParseState *pstate, List *targetlist);
extern void markTargetListOrigins(ParseState *pstate, List *targetlist);
extern TargetEntry *transformTargetEntry(ParseState *pstate,
Node *node, Node *expr, ParseExprKind exprKind,
diff --git a/src/test/regress/expected/create_table.out b/src/test/regress/expected/create_table.out
index 6caa9c2407..36266f0a32 100644
--- a/src/test/regress/expected/create_table.out
+++ b/src/test/regress/expected/create_table.out
@@ -199,6 +199,14 @@ CREATE TABLE array_index_op_test (
CREATE TABLE testjsonb (
j jsonb
);
+CREATE TABLE unknowntab (
+ u unknown -- fail
+);
+ERROR: column "u" has pseudo-type unknown
+CREATE TYPE unknown_comptype AS (
+ u unknown -- fail
+);
+ERROR: column "u" has pseudo-type unknown
CREATE TABLE IF NOT EXISTS test_tsvector(
t text,
a tsvector
diff --git a/src/test/regress/expected/create_view.out b/src/test/regress/expected/create_view.out
index 096bfc30c9..ce0c8cedf8 100644
--- a/src/test/regress/expected/create_view.out
+++ b/src/test/regress/expected/create_view.out
@@ -288,6 +288,32 @@ SELECT relname, relkind, reloptions FROM pg_class
mysecview4 | v | {security_barrier=false}
(4 rows)
+-- Check that unknown literals are converted to "text" in CREATE VIEW,
+-- so that we don't end up with unknown-type columns.
+CREATE VIEW unspecified_types AS
+ SELECT 42 as i, 42.5 as num, 'foo' as u, 'foo'::unknown as u2, null as n;
+\d+ unspecified_types
+ View "testviewschm2.unspecified_types"
+ Column | Type | Collation | Nullable | Default | Storage | Description
+--------+---------+-----------+----------+---------+----------+-------------
+ i | integer | | | | plain |
+ num | numeric | | | | main |
+ u | text | | | | extended |
+ u2 | text | | | | extended |
+ n | text | | | | extended |
+View definition:
+ SELECT 42 AS i,
+ 42.5 AS num,
+ 'foo'::text AS u,
+ 'foo'::text AS u2,
+ NULL::text AS n;
+
+SELECT * FROM unspecified_types;
+ i | num | u | u2 | n
+----+------+-----+-----+---
+ 42 | 42.5 | foo | foo |
+(1 row)
+
-- This test checks that proper typmods are assigned in a multi-row VALUES
CREATE VIEW tt1 AS
SELECT * FROM (
diff --git a/src/test/regress/expected/matview.out b/src/test/regress/expected/matview.out
index 7a2eaa0c4a..4ae44607a4 100644
--- a/src/test/regress/expected/matview.out
+++ b/src/test/regress/expected/matview.out
@@ -508,6 +508,33 @@ DETAIL: drop cascades to materialized view mvtest_mv_v
drop cascades to materialized view mvtest_mv_v_2
drop cascades to materialized view mvtest_mv_v_3
drop cascades to materialized view mvtest_mv_v_4
+-- Check that unknown literals are converted to "text" in CREATE MATVIEW,
+-- so that we don't end up with unknown-type columns.
+CREATE MATERIALIZED VIEW mv_unspecified_types AS
+ SELECT 42 as i, 42.5 as num, 'foo' as u, 'foo'::unknown as u2, null as n;
+\d+ mv_unspecified_types
+ Materialized view "public.mv_unspecified_types"
+ Column | Type | Collation | Nullable | Default | Storage | Stats target | Description
+--------+---------+-----------+----------+---------+----------+--------------+-------------
+ i | integer | | | | plain | |
+ num | numeric | | | | main | |
+ u | text | | | | extended | |
+ u2 | text | | | | extended | |
+ n | text | | | | extended | |
+View definition:
+ SELECT 42 AS i,
+ 42.5 AS num,
+ 'foo'::text AS u,
+ 'foo'::text AS u2,
+ NULL::text AS n;
+
+SELECT * FROM mv_unspecified_types;
+ i | num | u | u2 | n
+----+------+-----+-----+---
+ 42 | 42.5 | foo | foo |
+(1 row)
+
+DROP MATERIALIZED VIEW mv_unspecified_types;
-- make sure that create WITH NO DATA does not plan the query (bug #13907)
create materialized view mvtest_error as select 1/0 as x; -- fail
ERROR: division by zero
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index abd3217e86..47afdc335e 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -196,6 +196,37 @@ SELECT '' AS five, f1 AS "Correlated Field"
| 3
(5 rows)
+-- Unspecified-type literals in output columns should resolve as text
+SELECT *, pg_typeof(f1) FROM
+ (SELECT 'foo' AS f1 FROM generate_series(1,3)) ss ORDER BY 1;
+ f1 | pg_typeof
+-----+-----------
+ foo | text
+ foo | text
+ foo | text
+(3 rows)
+
+-- ... unless there's context to suggest differently
+explain verbose select '42' union all select '43';
+ QUERY PLAN
+-------------------------------------------------
+ Append (cost=0.00..0.04 rows=2 width=32)
+ -> Result (cost=0.00..0.01 rows=1 width=32)
+ Output: '42'::text
+ -> Result (cost=0.00..0.01 rows=1 width=32)
+ Output: '43'::text
+(5 rows)
+
+explain verbose select '42' union all select 43;
+ QUERY PLAN
+------------------------------------------------
+ Append (cost=0.00..0.04 rows=2 width=4)
+ -> Result (cost=0.00..0.01 rows=1 width=4)
+ Output: 42
+ -> Result (cost=0.00..0.01 rows=1 width=4)
+ Output: 43
+(5 rows)
+
--
-- Use some existing tables in the regression test
--
diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out
index 02fa08e932..3b7f689a98 100644
--- a/src/test/regress/expected/with.out
+++ b/src/test/regress/expected/with.out
@@ -133,9 +133,9 @@ SELECT * FROM t LIMIT 10;
-- Test behavior with an unknown-type literal in the WITH
WITH q AS (SELECT 'foo' AS x)
-SELECT x, x IS OF (unknown) as is_unknown FROM q;
- x | is_unknown
------+------------
+SELECT x, x IS OF (text) AS is_text FROM q;
+ x | is_text
+-----+---------
foo | t
(1 row)
@@ -144,7 +144,7 @@ WITH RECURSIVE t(n) AS (
UNION ALL
SELECT n || ' bar' FROM t WHERE length(n) < 20
)
-SELECT n, n IS OF (text) as is_text FROM t;
+SELECT n, n IS OF (text) AS is_text FROM t;
n | is_text
-------------------------+---------
foo | t
@@ -155,6 +155,18 @@ SELECT n, n IS OF (text) as is_text FROM t;
foo bar bar bar bar bar | t
(6 rows)
+-- In a perfect world, this would work and resolve the literal as int ...
+-- but for now, we have to be content with resolving to text too soon.
+WITH RECURSIVE t(n) AS (
+ SELECT '7'
+UNION ALL
+ SELECT n+1 FROM t WHERE n < 10
+)
+SELECT n, n IS OF (int) AS is_int FROM t;
+ERROR: operator does not exist: text + integer
+LINE 4: SELECT n+1 FROM t WHERE n < 10
+ ^
+HINT: No operator matches the given name and argument type(s). You might need to add explicit type casts.
--
-- Some examples with a tree
--
diff --git a/src/test/regress/output/create_function_1.source b/src/test/regress/output/create_function_1.source
index 30c2936f8d..957595c51e 100644
--- a/src/test/regress/output/create_function_1.source
+++ b/src/test/regress/output/create_function_1.source
@@ -59,7 +59,7 @@ CREATE FUNCTION test_atomic_ops()
CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL
AS 'SELECT ''not an integer'';';
ERROR: return type mismatch in function declared to return integer
-DETAIL: Actual return type is unknown.
+DETAIL: Actual return type is text.
CONTEXT: SQL function "test1"
CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL
AS 'not even SQL';
diff --git a/src/test/regress/sql/create_table.sql b/src/test/regress/sql/create_table.sql
index 8242e7328d..6314aa403f 100644
--- a/src/test/regress/sql/create_table.sql
+++ b/src/test/regress/sql/create_table.sql
@@ -236,6 +236,14 @@ CREATE TABLE testjsonb (
j jsonb
);
+CREATE TABLE unknowntab (
+ u unknown -- fail
+);
+
+CREATE TYPE unknown_comptype AS (
+ u unknown -- fail
+);
+
CREATE TABLE IF NOT EXISTS test_tsvector(
t text,
a tsvector
diff --git a/src/test/regress/sql/create_view.sql b/src/test/regress/sql/create_view.sql
index 5fe8b94aae..c27f1034e1 100644
--- a/src/test/regress/sql/create_view.sql
+++ b/src/test/regress/sql/create_view.sql
@@ -224,6 +224,14 @@ SELECT relname, relkind, reloptions FROM pg_class
'mysecview3'::regclass, 'mysecview4'::regclass)
ORDER BY relname;
+-- Check that unknown literals are converted to "text" in CREATE VIEW,
+-- so that we don't end up with unknown-type columns.
+
+CREATE VIEW unspecified_types AS
+ SELECT 42 as i, 42.5 as num, 'foo' as u, 'foo'::unknown as u2, null as n;
+\d+ unspecified_types
+SELECT * FROM unspecified_types;
+
-- This test checks that proper typmods are assigned in a multi-row VALUES
CREATE VIEW tt1 AS
diff --git a/src/test/regress/sql/matview.sql b/src/test/regress/sql/matview.sql
index 65a743ced9..1164b4cea2 100644
--- a/src/test/regress/sql/matview.sql
+++ b/src/test/regress/sql/matview.sql
@@ -198,6 +198,14 @@ SELECT * FROM mvtest_mv_v_3;
SELECT * FROM mvtest_mv_v_4;
DROP TABLE mvtest_v CASCADE;
+-- Check that unknown literals are converted to "text" in CREATE MATVIEW,
+-- so that we don't end up with unknown-type columns.
+CREATE MATERIALIZED VIEW mv_unspecified_types AS
+ SELECT 42 as i, 42.5 as num, 'foo' as u, 'foo'::unknown as u2, null as n;
+\d+ mv_unspecified_types
+SELECT * FROM mv_unspecified_types;
+DROP MATERIALIZED VIEW mv_unspecified_types;
+
-- make sure that create WITH NO DATA does not plan the query (bug #13907)
create materialized view mvtest_error as select 1/0 as x; -- fail
create materialized view mvtest_error as select 1/0 as x with no data;
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index 08eb825c54..9c2a73d4d7 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -80,6 +80,16 @@ SELECT '' AS five, f1 AS "Correlated Field"
WHERE (f1, f2) IN (SELECT f2, CAST(f3 AS int4) FROM SUBSELECT_TBL
WHERE f3 IS NOT NULL);
+-- Unspecified-type literals in output columns should resolve as text
+
+SELECT *, pg_typeof(f1) FROM
+ (SELECT 'foo' AS f1 FROM generate_series(1,3)) ss ORDER BY 1;
+
+-- ... unless there's context to suggest differently
+
+explain verbose select '42' union all select '43';
+explain verbose select '42' union all select 43;
+
--
-- Use some existing tables in the regression test
--
diff --git a/src/test/regress/sql/with.sql b/src/test/regress/sql/with.sql
index 7ee32bab8f..08ddc8bae0 100644
--- a/src/test/regress/sql/with.sql
+++ b/src/test/regress/sql/with.sql
@@ -69,14 +69,23 @@ SELECT * FROM t LIMIT 10;
-- Test behavior with an unknown-type literal in the WITH
WITH q AS (SELECT 'foo' AS x)
-SELECT x, x IS OF (unknown) as is_unknown FROM q;
+SELECT x, x IS OF (text) AS is_text FROM q;
WITH RECURSIVE t(n) AS (
SELECT 'foo'
UNION ALL
SELECT n || ' bar' FROM t WHERE length(n) < 20
)
-SELECT n, n IS OF (text) as is_text FROM t;
+SELECT n, n IS OF (text) AS is_text FROM t;
+
+-- In a perfect world, this would work and resolve the literal as int ...
+-- but for now, we have to be content with resolving to text too soon.
+WITH RECURSIVE t(n) AS (
+ SELECT '7'
+UNION ALL
+ SELECT n+1 FROM t WHERE n < 10
+)
+SELECT n, n IS OF (int) AS is_int FROM t;
--
-- Some examples with a tree
--
cgit v1.2.3
From d8d32d9a56a3cecfb14e8f47ebd50b780edffe60 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Wed, 25 Jan 2017 09:27:09 -0500
Subject: Make UNKNOWN into an actual pseudo-type.
Previously, type "unknown" was labeled as a base type in pg_type, which
perhaps had some sense to it because you were allowed to create tables with
unknown-type columns. But now that we don't allow that, it makes more
sense to label it a pseudo-type. This has the additional effects of
forbidding use of "unknown" as a domain base type, cast source or target
type, PL function argument or result type, or plpgsql local variable type;
all of which seem like good holes to plug.
Discussion: https://postgr.es/m/CAH2L28uwwbL9HUM-WR=hromW1Cvamkn7O-g8fPY2m=_7muJ0oA@mail.gmail.com
---
doc/src/sgml/datatype.sgml | 13 ++++++++++++-
doc/src/sgml/ddl.sgml | 2 +-
doc/src/sgml/plhandler.sgml | 2 +-
doc/src/sgml/protocol.sgml | 2 +-
doc/src/sgml/queries.sgml | 2 +-
doc/src/sgml/ref/create_function.sgml | 6 +++---
doc/src/sgml/ref/create_type.sgml | 2 +-
src/backend/catalog/heap.c | 3 +--
src/include/catalog/catversion.h | 2 +-
src/include/catalog/pg_type.h | 2 +-
src/test/regress/expected/type_sanity.out | 5 ++---
src/test/regress/sql/type_sanity.sql | 2 +-
12 files changed, 26 insertions(+), 17 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index 3bc6854be6..9ef7b4a025 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -4660,6 +4660,10 @@ SELECT * FROM pg_attribute
internal
+
+ unknown
+
+
opaque
@@ -4781,9 +4785,16 @@ SELECT * FROM pg_attribute
Indicates that a function returns no value.
+
+ unknown>
+ Identifies a not-yet-resolved type, e.g. of an undecorated
+ string literal.
+
+
opaque>
- An obsolete type name that formerly served all the above purposes.
+ An obsolete type name that formerly served many of the above
+ purposes.
diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml
index d7117cbc8f..aebe898466 100644
--- a/doc/src/sgml/ddl.sgml
+++ b/doc/src/sgml/ddl.sgml
@@ -2579,7 +2579,7 @@ WHERE c.altitude > 500 AND c.tableoid = p.oid;
Another way to get the same effect is to use the regclass>
- pseudo-type, which will print the table OID symbolically:
+ alias type, which will print the table OID symbolically:
SELECT c.tableoid::regclass, c.name, c.altitude
diff --git a/doc/src/sgml/plhandler.sgml b/doc/src/sgml/plhandler.sgml
index 0fc5d7b411..57a2a05ed2 100644
--- a/doc/src/sgml/plhandler.sgml
+++ b/doc/src/sgml/plhandler.sgml
@@ -26,7 +26,7 @@
language such as C, using the version-1 interface, and registered
with PostgreSQL as taking no arguments
and returning the type language_handler. This
- special pseudotype identifies the function as a call handler and
+ special pseudo-type identifies the function as a call handler and
prevents it from being called directly in SQL commands.
For more details on C language calling conventions and dynamic loading,
see .
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index 028ef10d91..9f054added 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -668,7 +668,7 @@
number of parameter symbols ($>n>)
used in the query string. Another special case is that a parameter's
type can be specified as void> (that is, the OID of the
- void> pseudotype). This is meant to allow parameter symbols
+ void> pseudo-type). This is meant to allow parameter symbols
to be used for function parameters that are actually OUT parameters.
Ordinarily there is no context in which a void> parameter
could be used, but if such a parameter symbol appears in a function's
diff --git a/doc/src/sgml/queries.sgml b/doc/src/sgml/queries.sgml
index ef623d59bd..30792f45f1 100644
--- a/doc/src/sgml/queries.sgml
+++ b/doc/src/sgml/queries.sgml
@@ -762,7 +762,7 @@ SELECT * FROM vw_getfoo;
In some cases it is useful to define table functions that can
return different column sets depending on how they are invoked.
To support this, the table function can be declared as returning
- the pseudotype record>. When such a function is used in
+ the pseudo-type record>. When such a function is used in
a query, the expected row structure must be specified in the
query itself, so that the system can know how to parse and plan
the query. This syntax looks like:
diff --git a/doc/src/sgml/ref/create_function.sgml b/doc/src/sgml/ref/create_function.sgml
index 8108a43095..e7057789d3 100644
--- a/doc/src/sgml/ref/create_function.sgml
+++ b/doc/src/sgml/ref/create_function.sgml
@@ -160,8 +160,8 @@ CREATE [ OR REPLACE ] FUNCTION
Depending on the implementation language it might also be allowed
- to specify pseudotypes> such as cstring>.
- Pseudotypes indicate that the actual argument type is either
+ to specify pseudo-types> such as cstring>.
+ Pseudo-types indicate that the actual argument type is either
incompletely specified, or outside the set of ordinary SQL data types.
@@ -199,7 +199,7 @@ CREATE [ OR REPLACE ] FUNCTION
can be a base, composite, or domain type,
or can reference the type of a table column.
Depending on the implementation language it might also be allowed
- to specify pseudotypes> such as cstring>.
+ to specify pseudo-types> such as cstring>.
If the function is not supposed to return a value, specify
void> as the return type.
diff --git a/doc/src/sgml/ref/create_type.sgml b/doc/src/sgml/ref/create_type.sgml
index 5a09f1942a..7146c4a27b 100644
--- a/doc/src/sgml/ref/create_type.sgml
+++ b/doc/src/sgml/ref/create_type.sgml
@@ -824,7 +824,7 @@ CREATE TYPE name
In PostgreSQL versions before 7.3, it
was customary to avoid creating a shell type at all, by replacing the
functions' forward references to the type name with the placeholder
- pseudotype opaque>. The cstring> arguments and
+ pseudo-type opaque>. The cstring> arguments and
results also had to be declared as opaque> before 7.3. To
support loading of old dump files, CREATE TYPE> will
accept I/O functions declared using opaque>, but it will issue
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index d7a3513d29..7ce9115832 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -490,8 +490,7 @@ CheckAttributeType(const char *attname,
char att_typtype = get_typtype(atttypid);
Oid att_typelem;
- if (atttypid == UNKNOWNOID ||
- att_typtype == TYPTYPE_PSEUDO)
+ if (att_typtype == TYPTYPE_PSEUDO)
{
/*
* Refuse any attempt to create a pseudo-type column, except for a
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 79a9f108a6..417cfc36ec 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201701201
+#define CATALOG_VERSION_NO 201701251
#endif
diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h
index c2350f3867..6e4c65e6ad 100644
--- a/src/include/catalog/pg_type.h
+++ b/src/include/catalog/pg_type.h
@@ -418,7 +418,7 @@ DESCR("relative, limited-range time interval (Unix delta time)");
DATA(insert OID = 704 ( tinterval PGNSP PGUID 12 f b T f t \054 0 0 1025 tintervalin tintervalout tintervalrecv tintervalsend - - - i p f 0 -1 0 0 _null_ _null_ _null_ ));
DESCR("(abstime,abstime), time interval");
#define TINTERVALOID 704
-DATA(insert OID = 705 ( unknown PGNSP PGUID -2 f b X f t \054 0 0 0 unknownin unknownout unknownrecv unknownsend - - - c p f 0 -1 0 0 _null_ _null_ _null_ ));
+DATA(insert OID = 705 ( unknown PGNSP PGUID -2 f p X f t \054 0 0 0 unknownin unknownout unknownrecv unknownsend - - - c p f 0 -1 0 0 _null_ _null_ _null_ ));
DESCR("");
#define UNKNOWNOID 705
diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out
index e5adfba631..312d290e73 100644
--- a/src/test/regress/expected/type_sanity.out
+++ b/src/test/regress/expected/type_sanity.out
@@ -59,7 +59,7 @@ WHERE (p1.typtype = 'c' AND p1.typrelid = 0) OR
-- Look for types that should have an array type according to their typtype,
-- but don't. We exclude composites here because we have not bothered to
-- make array types corresponding to the system catalogs' rowtypes.
--- NOTE: as of 9.1, this check finds pg_node_tree, smgr, and unknown.
+-- NOTE: as of v10, this check finds pg_node_tree and smgr.
SELECT p1.oid, p1.typname
FROM pg_type as p1
WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%'
@@ -71,8 +71,7 @@ WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%'
-----+--------------
194 | pg_node_tree
210 | smgr
- 705 | unknown
-(3 rows)
+(2 rows)
-- Make sure typarray points to a varlena array type of our own base
SELECT p1.oid, p1.typname as basetype, p2.typname as arraytype,
diff --git a/src/test/regress/sql/type_sanity.sql b/src/test/regress/sql/type_sanity.sql
index f7c5c9d5d4..0282f84d2e 100644
--- a/src/test/regress/sql/type_sanity.sql
+++ b/src/test/regress/sql/type_sanity.sql
@@ -53,7 +53,7 @@ WHERE (p1.typtype = 'c' AND p1.typrelid = 0) OR
-- Look for types that should have an array type according to their typtype,
-- but don't. We exclude composites here because we have not bothered to
-- make array types corresponding to the system catalogs' rowtypes.
--- NOTE: as of 9.1, this check finds pg_node_tree, smgr, and unknown.
+-- NOTE: as of v10, this check finds pg_node_tree and smgr.
SELECT p1.oid, p1.typname
FROM pg_type as p1
--
cgit v1.2.3
From 87ecf2d14fa236e894267ef5e702ff08b8965f9d Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Wed, 25 Jan 2017 09:28:38 -0500
Subject: doc: Fix typo
---
doc/src/sgml/ref/alter_sequence.sgml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/alter_sequence.sgml b/doc/src/sgml/ref/alter_sequence.sgml
index 307366526f..3b52e875e3 100644
--- a/doc/src/sgml/ref/alter_sequence.sgml
+++ b/doc/src/sgml/ref/alter_sequence.sgml
@@ -117,7 +117,7 @@ ALTER SEQUENCE [ IF EXISTS ] name S
The optional clause MAXVALUE maxvalue determines
the maximum value for the sequence. If NO
- MAXVALUE is specified, the defaults are
+ MAXVALUE is specified, the defaults of
263>-1 and -1 for ascending and descending
sequences, respectively, will be used. If neither option is
specified, the current maximum value will be maintained.
--
cgit v1.2.3
From 98cc5db27c97cccd19c4e67e8e623f2d260a5c35 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Wed, 25 Jan 2017 12:42:11 -0500
Subject: doc: Logical replication documentation improvements
From: Erik Rijkers
---
doc/src/sgml/logical-replication.sgml | 64 +++++++++++++++++------------------
1 file changed, 32 insertions(+), 32 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/logical-replication.sgml b/doc/src/sgml/logical-replication.sgml
index 9312c0c9a0..7b351f2727 100644
--- a/doc/src/sgml/logical-replication.sgml
+++ b/doc/src/sgml/logical-replication.sgml
@@ -24,7 +24,7 @@
- Logical replication sends the changes on the publisher to the subscriber as
+ Logical replication sends changes on the publisher to the subscriber as
they occur in real-time. The subscriber applies the data in the same order
as the publisher so that transactional consistency is guaranteed for
publications within a single subscription. This method of data replication
@@ -44,7 +44,7 @@
- Firing triggers for individual changes as they are incoming to
+ Firing triggers for individual changes as they arrive on the
subscriber.
@@ -81,19 +81,19 @@
instance and can be used as a publisher for other databases by defining its
own publications. When the subscriber is treated as read-only by
application, there will be no conflicts from a single subscription. On the
- other hand, if there are other writes done either by application or other
- subscribers to the same set of tables conflicts can arise.
+ other hand, if there are other writes done either by an application or by other
+ subscribers to the same set of tables, conflicts can arise.
Publication
- A publication object can be defined on any physical
+ A publication can be defined on any physical
replication master. The node where a publication is defined is referred to
as publisher. A publication is a set of changes
- generated from a group of tables, and might also be described as a change
- set or replication set. Each publication exists in only one database.
+ generated from a table or a group of tables, and might also be described as
+ a change set or replication set. Each publication exists in only one database.
@@ -105,9 +105,9 @@
- Publications can choose to limit the changes they produce to show
+ Publications can choose to limit the changes they produce to
any combination of INSERT, UPDATE, and
- DELETE in a similar way to the way triggers are fired by
+ DELETE, similar to how triggers are fired by
particular event types. If a table without a REPLICA
IDENTITY is added to a publication that
replicates UPDATE or DELETE
@@ -121,7 +121,7 @@
A publication is created using the
- command and may be later altered or dropped using corresponding commands.
+ command and may later be altered or dropped using corresponding commands.
@@ -139,9 +139,9 @@
A subscription is the downstream side of logical
replication. The node where a subscription is defined is referred to as
- the subscriber. Subscription defines the connection
+ the subscriber. A subscription defines the connection
to another database and set of publications (one or more) to which it wants
- to be subscribed.
+ to subscribe.
@@ -153,7 +153,7 @@
A subscriber node may have multiple subscriptions if desired. It is
possible to define multiple subscriptions between a single
- publisher-subscriber pair, in which case extra care must be taken to ensure
+ publisher-subscriber pair, in which case care must be taken to ensure
that the subscribed publication objects don't overlap.
@@ -163,9 +163,9 @@
- Subscriptions are not dumped by pg_dump by default but
- can be requested using the command-line
- option
--subscriptions
.
+ Subscriptions are not dumped by pg_dump by default, but
+ this can be requested using the command-line
+ option
--include-subscriptions
.
@@ -182,8 +182,8 @@
- The schema definitions are not replicated and the published tables must
- exist on the subsriber for replication to work. Only regular tables may be
+ The schema definitions are not replicated, and the published tables must
+ exist on the subscriber. Only regular tables may be
the target of replication. For example, you can't replicate to a view.
@@ -203,9 +203,9 @@
Conflicts
- The logical replication behaves similarly to normal DML operations in that
+ Logical replication behaves similarly to normal DML operations in that
the data will be updated even if it was changed locally on the subscriber
- node. If the incoming data violates any constraints the replication will
+ node. If incoming data violates any constraints the replication will
stop. This is referred to as a conflict. When
replicating UPDATE or DELETE
operations, missing data will not produce a conflict and such operations
@@ -224,8 +224,8 @@
transaction that conflicts with the existing data. The transaction can be
skipped by calling the
pg_replication_origin_advance() function with
- a node_name corresponding to the subscription name.
- The current position of origins can be seen in the
+ a node_name corresponding to the subscription name,
+ and a position. The current position of origins can be seen in the
pg_replication_origin_status system view.
@@ -246,8 +246,8 @@
Logical replication is built with an architecture similar to physical
streaming replication (see ). It is
- implemented by walsender and the apply
- processes. The walsender starts logical decoding (described
+ implemented by walsender and apply
+ processes. The walsender process starts logical decoding (described
in ) of the WAL and loads the standard
logical decoding plugin (pgoutput). The plugin transforms the changes read
from WAL to the logical replication protocol
@@ -255,7 +255,7 @@
according to the publication specification. The data is then continuously
transferred using the streaming replication protocol to the apply worker,
which maps the data to local tables and applies the individual changes as
- they are received in exact transactional order.
+ they are received, in correct transactional order.
@@ -270,9 +270,9 @@
Monitoring
- Because logical replication is based on similar architecture as
- physical streaming replication
- the monitoring on a publication node is very similar to monitoring of
+ Because logical replication is based on a similar architecture as
+ physical streaming replication,
+ the monitoring on a publication node is similar to monitoring of a
physical replication master
(see ).
@@ -295,8 +295,8 @@
Security
- Logical replication connections occur in the same way as physical streaming
- replication. It requires access to be specifically given using
+ Logical replication connections occur in the same way as with physical streaming
+ replication. It requires access to be explicitly given using
pg_hba.conf. The role used for the replication
connection must have the REPLICATION attribute. This
gives a role access to both logical and physical replication.
@@ -333,7 +333,7 @@
On the publisher side, wal_level must be set to
logical, and max_replication_slots
- has to be set to at least the number of subscriptions expected to connect.
+ must be set to at least the number of subscriptions expected to connect.
And max_wal_senders should be set to at least the same
as max_replication_slots plus the number of physical replicas
that are connected at the same time.
@@ -343,7 +343,7 @@
The subscriber also requires the max_replication_slots
to be set. In this case it should be set to at least the number of
subscriptions that will be added to the subscriber.
- max_logical_replication_workers has to be set to at
+ max_logical_replication_workers must be set to at
least the number of subscriptions. Additionally the
max_worker_processes may need to be adjusted to
accommodate for replication workers, at least
--
cgit v1.2.3
From 049ac809a790a9bde478e371da2b68d6b18c5df7 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Wed, 25 Jan 2017 12:49:10 -0500
Subject: doc: Fix typo
---
doc/src/sgml/catalogs.sgml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 524180e011..086fafc694 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -9319,7 +9319,7 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
temporaryboolean
- True if this is temporary replication slot. Temporary slots are
+ True if this is a temporary replication slot. Temporary slots are
not saved to disk and are automatically dropped on error or when
the session has finished.
--
cgit v1.2.3
From 5a366b4ff4ceceb9793fcc13c3f097ee0d32c56d Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Wed, 25 Jan 2017 14:35:31 -0500
Subject: Fix typo: pg_statistics -> pg_statistic
---
doc/src/sgml/planstats.sgml | 2 +-
doc/src/sgml/release-7.4.sgml | 2 +-
doc/src/sgml/release-old.sgml | 4 ++--
src/backend/postmaster/autovacuum.c | 2 +-
src/backend/utils/cache/lsyscache.c | 2 +-
5 files changed, 6 insertions(+), 6 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/planstats.sgml b/doc/src/sgml/planstats.sgml
index 1a482d37f4..b73c66bde2 100644
--- a/doc/src/sgml/planstats.sgml
+++ b/doc/src/sgml/planstats.sgml
@@ -94,7 +94,7 @@ EXPLAIN SELECT * FROM tenk1 WHERE unique1 < 1000;
and the entry in this case is scalarltsel.
The scalarltsel function retrieves the histogram for
unique1 from
- pg_statistics. For manual queries it is more
+ pg_statistic. For manual queries it is more
convenient to look in the simpler pg_stats
view:
diff --git a/doc/src/sgml/release-7.4.sgml b/doc/src/sgml/release-7.4.sgml
index e42be5b89d..649c82b17e 100644
--- a/doc/src/sgml/release-7.4.sgml
+++ b/doc/src/sgml/release-7.4.sgml
@@ -2614,7 +2614,7 @@ UPDATE pg_database SET datallowconn = false WHERE datname = 'template0';
-Fix pg_statistics> alignment bug that could crash optimizer
+Fix pg_statistic> alignment bug that could crash optimizerSee above for details about this problem.Allow non-super users to update pg_settings>Fix several optimizer bugs, most of which led to
diff --git a/doc/src/sgml/release-old.sgml b/doc/src/sgml/release-old.sgml
index cd9b3db35a..a480a1d484 100644
--- a/doc/src/sgml/release-old.sgml
+++ b/doc/src/sgml/release-old.sgml
@@ -1650,7 +1650,7 @@ operations on bytea columns (Joe)Add variable db_user_namespace for database-local user names (Bruce)SSL improvements (Bear Giles)Make encryption of stored passwords the default (Bruce)
-Allow pg_statistics to be reset by calling pg_stat_reset() (Christopher)
+Allow pg_statistic to be reset by calling pg_stat_reset() (Christopher)Add log_duration parameter (Bruce)Rename debug_print_query to log_statement (Bruce)Rename show_query_stats to show_statement_stats (Bruce)
@@ -3893,7 +3893,7 @@ Faster sorting by calling fewer functions (Tom)
Create system indexes to match all system caches (Bruce, Hiroshi)
Make system caches use system indexes (Bruce)
Make all system indexes unique (Bruce)
-Improve pg_statistics management for VACUUM speed improvement (Tom)
+Improve pg_statistic management for VACUUM speed improvement (Tom)
Flush backend cache less frequently (Tom, Hiroshi)
COPY now reuses previous memory allocation, improving performance (Tom)
Improve optimization cost estimation (Tom)
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 51821d3f55..0c5ffa086c 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -2876,7 +2876,7 @@ relation_needs_vacanalyze(Oid relid,
*doanalyze = false;
}
- /* ANALYZE refuses to work with pg_statistics */
+ /* ANALYZE refuses to work with pg_statistic */
if (relid == StatisticRelationId)
*doanalyze = false;
}
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index aff88a555d..1b04c098d0 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -2832,7 +2832,7 @@ get_attavgwidth(Oid relid, AttrNumber attnum)
* that have been provided by a stats hook and didn't really come from
* pg_statistic.
*
- * statstuple: pg_statistics tuple to be examined.
+ * statstuple: pg_statistic tuple to be examined.
* atttype: type OID of attribute (can be InvalidOid if values == NULL).
* atttypmod: typmod of attribute (can be 0 if values == NULL).
* reqkind: STAKIND code for desired statistics slot kind.
--
cgit v1.2.3
From cdcad6b788a8651af2e48bf7d4160ae591e1f4d7 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Thu, 26 Jan 2017 15:36:59 -0500
Subject: doc: Update privileges documentation
The CREATE privilege on databases now also enables creating
publications.
---
doc/src/sgml/ref/grant.sgml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/grant.sgml b/doc/src/sgml/ref/grant.sgml
index cd8f3f50ba..d8ca39f869 100644
--- a/doc/src/sgml/ref/grant.sgml
+++ b/doc/src/sgml/ref/grant.sgml
@@ -279,7 +279,7 @@ GRANT role_name [, ...] TO CREATE
- For databases, allows new schemas to be created within the database.
+ For databases, allows new schemas and publications to be created within the database.
For schemas, allows new objects to be created within the schema.
--
cgit v1.2.3
From 511ae628f31b4e791cd5c7836e46cb84dcf145fd Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Mon, 30 Jan 2017 16:37:15 -0500
Subject: Make psql reject attempts to set special variables to invalid values.
Previously, if the user set a special variable such as ECHO to an
unrecognized value, psql would bleat but store the new value anyway, and
then fall back to a default setting for the behavior controlled by the
variable. This was agreed to be a not particularly good idea. With
this patch, invalid values result in an error message and no change in
state.
(But this applies only to variables that affect psql's behavior; purely
informational variables such as ENCODING can still be set to random
values.)
To do this, modify the API for psql's assign-hook functions so that they
can return an OK/not OK result, and give them the responsibility for
printing error messages when they reject a value. Adjust the APIs for
ParseVariableBool and ParseVariableNum to support the new behavior
conveniently.
In passing, document the variable VERSION, which had somehow escaped that.
And improve the quite-inadequate commenting in psql/variables.c.
Daniel Vérité, reviewed by Rahila Syed, some further tweaking by me
Discussion: https://postgr.es/m/7356e741-fa59-4146-a8eb-cf95fd6b21fb@mm
---
doc/src/sgml/ref/psql-ref.sgml | 33 ++++--
src/bin/psql/command.c | 82 +++++++------
src/bin/psql/common.c | 1 -
src/bin/psql/input.c | 2 +-
src/bin/psql/mainloop.c | 2 +-
src/bin/psql/startup.c | 121 ++++++++++++--------
src/bin/psql/variables.c | 228 ++++++++++++++++++++++++++-----------
src/bin/psql/variables.h | 50 +++++---
src/test/regress/expected/psql.out | 11 +-
src/test/regress/sql/psql.sql | 8 ++
10 files changed, 356 insertions(+), 182 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml
index 640fe12bbf..4e51e90906 100644
--- a/doc/src/sgml/ref/psql-ref.sgml
+++ b/doc/src/sgml/ref/psql-ref.sgml
@@ -3078,10 +3078,8 @@ bar
by psql. They represent certain option
settings that can be changed at run time by altering the value of
the variable, or in some cases represent changeable state of
- psql. Although
- you can use these variables for other purposes, this is not
- recommended, as the program behavior might grow really strange
- really quickly. By convention, all specially treated variables' names
+ psql.
+ By convention, all specially treated variables' names
consist of all upper-case ASCII letters (and possibly digits and
underscores). To ensure maximum compatibility in the future, avoid
using such variable names for your own purposes. A list of all specially
@@ -3170,12 +3168,11 @@ bar
start-up, use the switch
-a
. If set to
queries,
psql prints each query to standard output
- as it is sent to the server. The switch for this is
+ as it is sent to the server. The switch to select this behavior is
-e
. If set to errors, then only
failed queries are displayed on standard error output. The switch
- for this is
-b
. If unset, or if set to
- none (or any other value than those above) then
- no queries are displayed.
+ for this behavior is
-b
. If unset, or if set to
+ none, then no queries are displayed.
@@ -3201,6 +3198,9 @@ bar
The current client character set encoding.
+ This is set every time you connect to a database (including
+ program start-up), and when you change the encoding
+ with \encoding>, but it can be unset.
@@ -3241,9 +3241,8 @@ bar
list. If set to a value of ignoredups, lines
matching the previous history line are not entered. A value of
ignoreboth combines the two options. If
- unset, or if set to none (or any other value
- than those above), all lines read in interactive mode are
- saved on the history list.
+ unset, or if set to none (the default), all lines
+ read in interactive mode are saved on the history list.
@@ -3312,7 +3311,7 @@ bar
to an interactive session of psql
will terminate the application. If set to a numeric value,
that many EOF> characters are ignored before the
- application terminates. If the variable is set but has no
+ application terminates. If the variable is set but not to a
numeric value, the default is 10.
@@ -3477,6 +3476,16 @@ bar
+
+ VERSION
+
+
+ This variable is set at program start-up to
+ reflect psql>'s version. It can be unset or changed.
+
+
+
+
diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c
index 0c164a339c..f17f610717 100644
--- a/src/bin/psql/command.c
+++ b/src/bin/psql/command.c
@@ -248,31 +248,37 @@ exec_command(const char *cmd,
*opt2,
*opt3,
*opt4;
- enum trivalue reuse_previous;
+ enum trivalue reuse_previous = TRI_DEFAULT;
opt1 = read_connect_arg(scan_state);
if (opt1 != NULL && strncmp(opt1, prefix, sizeof(prefix) - 1) == 0)
{
- reuse_previous =
- ParseVariableBool(opt1 + sizeof(prefix) - 1, prefix) ?
- TRI_YES : TRI_NO;
+ bool on_off;
- free(opt1);
- opt1 = read_connect_arg(scan_state);
+ success = ParseVariableBool(opt1 + sizeof(prefix) - 1,
+ "-reuse-previous",
+ &on_off);
+ if (success)
+ {
+ reuse_previous = on_off ? TRI_YES : TRI_NO;
+ free(opt1);
+ opt1 = read_connect_arg(scan_state);
+ }
}
- else
- reuse_previous = TRI_DEFAULT;
- opt2 = read_connect_arg(scan_state);
- opt3 = read_connect_arg(scan_state);
- opt4 = read_connect_arg(scan_state);
+ if (success) /* give up if reuse_previous was invalid */
+ {
+ opt2 = read_connect_arg(scan_state);
+ opt3 = read_connect_arg(scan_state);
+ opt4 = read_connect_arg(scan_state);
- success = do_connect(reuse_previous, opt1, opt2, opt3, opt4);
+ success = do_connect(reuse_previous, opt1, opt2, opt3, opt4);
+ free(opt2);
+ free(opt3);
+ free(opt4);
+ }
free(opt1);
- free(opt2);
- free(opt3);
- free(opt4);
}
/* \cd */
@@ -1208,10 +1214,7 @@ exec_command(const char *cmd,
if (result &&
!SetVariable(pset.vars, opt, result))
- {
- psql_error("\\%s: error while setting variable\n", cmd);
success = false;
- }
if (result)
free(result);
@@ -1325,10 +1328,8 @@ exec_command(const char *cmd,
}
if (!SetVariable(pset.vars, opt0, newval))
- {
- psql_error("\\%s: error while setting variable\n", cmd);
success = false;
- }
+
free(newval);
}
free(opt0);
@@ -1564,7 +1565,7 @@ exec_command(const char *cmd,
OT_NORMAL, NULL, false);
if (opt)
- pset.timing = ParseVariableBool(opt, "\\timing");
+ success = ParseVariableBool(opt, "\\timing", &pset.timing);
else
pset.timing = !pset.timing;
if (!pset.quiet)
@@ -1589,10 +1590,8 @@ exec_command(const char *cmd,
success = false;
}
else if (!SetVariable(pset.vars, opt, NULL))
- {
- psql_error("\\%s: error while setting variable\n", cmd);
success = false;
- }
+
free(opt);
}
@@ -2593,7 +2592,6 @@ do_pset(const char *param, const char *value, printQueryOpt *popt, bool quiet)
psql_error("\\pset: allowed formats are unaligned, aligned, wrapped, html, asciidoc, latex, latex-longtable, troff-ms\n");
return false;
}
-
}
/* set table line style */
@@ -2612,7 +2610,6 @@ do_pset(const char *param, const char *value, printQueryOpt *popt, bool quiet)
psql_error("\\pset: allowed line styles are ascii, old-ascii, unicode\n");
return false;
}
-
}
/* set unicode border line style */
@@ -2665,7 +2662,6 @@ do_pset(const char *param, const char *value, printQueryOpt *popt, bool quiet)
{
if (value)
popt->topt.border = atoi(value);
-
}
/* set expanded/vertical mode */
@@ -2676,7 +2672,17 @@ do_pset(const char *param, const char *value, printQueryOpt *popt, bool quiet)
if (value && pg_strcasecmp(value, "auto") == 0)
popt->topt.expanded = 2;
else if (value)
- popt->topt.expanded = ParseVariableBool(value, param);
+ {
+ bool on_off;
+
+ if (ParseVariableBool(value, NULL, &on_off))
+ popt->topt.expanded = on_off ? 1 : 0;
+ else
+ {
+ PsqlVarEnumError(param, value, "on, off, auto");
+ return false;
+ }
+ }
else
popt->topt.expanded = !popt->topt.expanded;
}
@@ -2685,7 +2691,7 @@ do_pset(const char *param, const char *value, printQueryOpt *popt, bool quiet)
else if (strcmp(param, "numericlocale") == 0)
{
if (value)
- popt->topt.numericLocale = ParseVariableBool(value, param);
+ return ParseVariableBool(value, param, &popt->topt.numericLocale);
else
popt->topt.numericLocale = !popt->topt.numericLocale;
}
@@ -2740,7 +2746,7 @@ do_pset(const char *param, const char *value, printQueryOpt *popt, bool quiet)
else if (strcmp(param, "t") == 0 || strcmp(param, "tuples_only") == 0)
{
if (value)
- popt->topt.tuples_only = ParseVariableBool(value, param);
+ return ParseVariableBool(value, param, &popt->topt.tuples_only);
else
popt->topt.tuples_only = !popt->topt.tuples_only;
}
@@ -2772,10 +2778,14 @@ do_pset(const char *param, const char *value, printQueryOpt *popt, bool quiet)
popt->topt.pager = 2;
else if (value)
{
- if (ParseVariableBool(value, param))
- popt->topt.pager = 1;
- else
- popt->topt.pager = 0;
+ bool on_off;
+
+ if (!ParseVariableBool(value, NULL, &on_off))
+ {
+ PsqlVarEnumError(param, value, "on, off, always");
+ return false;
+ }
+ popt->topt.pager = on_off ? 1 : 0;
}
else if (popt->topt.pager == 1)
popt->topt.pager = 0;
@@ -2794,7 +2804,7 @@ do_pset(const char *param, const char *value, printQueryOpt *popt, bool quiet)
else if (strcmp(param, "footer") == 0)
{
if (value)
- popt->topt.default_footer = ParseVariableBool(value, param);
+ return ParseVariableBool(value, param, &popt->topt.default_footer);
else
popt->topt.default_footer = !popt->topt.default_footer;
}
diff --git a/src/bin/psql/common.c b/src/bin/psql/common.c
index e1b04de013..6e3acdc416 100644
--- a/src/bin/psql/common.c
+++ b/src/bin/psql/common.c
@@ -841,7 +841,6 @@ StoreQueryTuple(const PGresult *result)
if (!SetVariable(pset.vars, varname, value))
{
- psql_error("could not set variable \"%s\"\n", varname);
free(varname);
success = false;
break;
diff --git a/src/bin/psql/input.c b/src/bin/psql/input.c
index 972bea4cbf..3e3e97ad0d 100644
--- a/src/bin/psql/input.c
+++ b/src/bin/psql/input.c
@@ -541,7 +541,7 @@ finishInput(void)
{
int hist_size;
- hist_size = GetVariableNum(pset.vars, "HISTSIZE", 500, -1, true);
+ hist_size = GetVariableNum(pset.vars, "HISTSIZE", 500, -1);
(void) saveHistory(psql_history, hist_size);
free(psql_history);
psql_history = NULL;
diff --git a/src/bin/psql/mainloop.c b/src/bin/psql/mainloop.c
index bb306a4327..dc25b4babc 100644
--- a/src/bin/psql/mainloop.c
+++ b/src/bin/psql/mainloop.c
@@ -162,7 +162,7 @@ MainLoop(FILE *source)
/* This tries to mimic bash's IGNOREEOF feature. */
count_eof++;
- if (count_eof < GetVariableNum(pset.vars, "IGNOREEOF", 0, 10, false))
+ if (count_eof < GetVariableNum(pset.vars, "IGNOREEOF", 0, 10))
{
if (!pset.quiet)
printf(_("Use \"\\q\" to leave %s.\n"), pset.progname);
diff --git a/src/bin/psql/startup.c b/src/bin/psql/startup.c
index 85aac4a165..0574b5bdfb 100644
--- a/src/bin/psql/startup.c
+++ b/src/bin/psql/startup.c
@@ -588,11 +588,7 @@ parse_psql_options(int argc, char *argv[], struct adhoc_opts * options)
{
*equal_loc = '\0';
if (!SetVariable(pset.vars, value, equal_loc + 1))
- {
- fprintf(stderr, _("%s: could not set variable \"%s\"\n"),
- pset.progname, value);
exit(EXIT_FAILURE);
- }
}
free(value);
@@ -786,43 +782,47 @@ showVersion(void)
* This isn't an amazingly good place for them, but neither is anywhere else.
*/
-static void
+static bool
autocommit_hook(const char *newval)
{
- pset.autocommit = ParseVariableBool(newval, "AUTOCOMMIT");
+ return ParseVariableBool(newval, "AUTOCOMMIT", &pset.autocommit);
}
-static void
+static bool
on_error_stop_hook(const char *newval)
{
- pset.on_error_stop = ParseVariableBool(newval, "ON_ERROR_STOP");
+ return ParseVariableBool(newval, "ON_ERROR_STOP", &pset.on_error_stop);
}
-static void
+static bool
quiet_hook(const char *newval)
{
- pset.quiet = ParseVariableBool(newval, "QUIET");
+ return ParseVariableBool(newval, "QUIET", &pset.quiet);
}
-static void
+static bool
singleline_hook(const char *newval)
{
- pset.singleline = ParseVariableBool(newval, "SINGLELINE");
+ return ParseVariableBool(newval, "SINGLELINE", &pset.singleline);
}
-static void
+static bool
singlestep_hook(const char *newval)
{
- pset.singlestep = ParseVariableBool(newval, "SINGLESTEP");
+ return ParseVariableBool(newval, "SINGLESTEP", &pset.singlestep);
}
-static void
+static bool
fetch_count_hook(const char *newval)
{
- pset.fetch_count = ParseVariableNum(newval, -1, -1, false);
+ if (newval == NULL)
+ pset.fetch_count = -1; /* default value */
+ else if (!ParseVariableNum(newval, "FETCH_COUNT", &pset.fetch_count))
+ return false;
+ return true;
}
-static void
+static bool
echo_hook(const char *newval)
{
if (newval == NULL)
@@ -837,39 +837,57 @@ echo_hook(const char *newval)
pset.echo = PSQL_ECHO_NONE;
else
{
- psql_error("unrecognized value \"%s\" for \"%s\"; assuming \"%s\"\n",
- newval, "ECHO", "none");
- pset.echo = PSQL_ECHO_NONE;
+ PsqlVarEnumError("ECHO", newval, "none, errors, queries, all");
+ return false;
}
+ return true;
}
-static void
+static bool
echo_hidden_hook(const char *newval)
{
if (newval == NULL)
pset.echo_hidden = PSQL_ECHO_HIDDEN_OFF;
else if (pg_strcasecmp(newval, "noexec") == 0)
pset.echo_hidden = PSQL_ECHO_HIDDEN_NOEXEC;
- else if (ParseVariableBool(newval, "ECHO_HIDDEN"))
- pset.echo_hidden = PSQL_ECHO_HIDDEN_ON;
- else /* ParseVariableBool printed msg if needed */
- pset.echo_hidden = PSQL_ECHO_HIDDEN_OFF;
+ else
+ {
+ bool on_off;
+
+ if (ParseVariableBool(newval, NULL, &on_off))
+ pset.echo_hidden = on_off ? PSQL_ECHO_HIDDEN_ON : PSQL_ECHO_HIDDEN_OFF;
+ else
+ {
+ PsqlVarEnumError("ECHO_HIDDEN", newval, "on, off, noexec");
+ return false;
+ }
+ }
+ return true;
}
-static void
+static bool
on_error_rollback_hook(const char *newval)
{
if (newval == NULL)
pset.on_error_rollback = PSQL_ERROR_ROLLBACK_OFF;
else if (pg_strcasecmp(newval, "interactive") == 0)
pset.on_error_rollback = PSQL_ERROR_ROLLBACK_INTERACTIVE;
- else if (ParseVariableBool(newval, "ON_ERROR_ROLLBACK"))
- pset.on_error_rollback = PSQL_ERROR_ROLLBACK_ON;
- else /* ParseVariableBool printed msg if needed */
- pset.on_error_rollback = PSQL_ERROR_ROLLBACK_OFF;
+ else
+ {
+ bool on_off;
+
+ if (ParseVariableBool(newval, NULL, &on_off))
+ pset.on_error_rollback = on_off ? PSQL_ERROR_ROLLBACK_ON : PSQL_ERROR_ROLLBACK_OFF;
+ else
+ {
+ PsqlVarEnumError("ON_ERROR_ROLLBACK", newval, "on, off, interactive");
+ return false;
+ }
+ }
+ return true;
}
-static void
+static bool
comp_keyword_case_hook(const char *newval)
{
if (newval == NULL)
@@ -884,13 +902,14 @@ comp_keyword_case_hook(const char *newval)
pset.comp_case = PSQL_COMP_CASE_LOWER;
else
{
- psql_error("unrecognized value \"%s\" for \"%s\"; assuming \"%s\"\n",
- newval, "COMP_KEYWORD_CASE", "preserve-upper");
- pset.comp_case = PSQL_COMP_CASE_PRESERVE_UPPER;
+ PsqlVarEnumError("COMP_KEYWORD_CASE", newval,
+ "lower, upper, preserve-lower, preserve-upper");
+ return false;
}
+ return true;
}
-static void
+static bool
histcontrol_hook(const char *newval)
{
if (newval == NULL)
@@ -905,31 +924,35 @@ histcontrol_hook(const char *newval)
pset.histcontrol = hctl_none;
else
{
- psql_error("unrecognized value \"%s\" for \"%s\"; assuming \"%s\"\n",
- newval, "HISTCONTROL", "none");
- pset.histcontrol = hctl_none;
+ PsqlVarEnumError("HISTCONTROL", newval,
+ "none, ignorespace, ignoredups, ignoreboth");
+ return false;
}
+ return true;
}
-static void
+static bool
prompt1_hook(const char *newval)
{
pset.prompt1 = newval ? newval : "";
+ return true;
}
-static void
+static bool
prompt2_hook(const char *newval)
{
pset.prompt2 = newval ? newval : "";
+ return true;
}
-static void
+static bool
prompt3_hook(const char *newval)
{
pset.prompt3 = newval ? newval : "";
+ return true;
}
-static void
+static bool
verbosity_hook(const char *newval)
{
if (newval == NULL)
@@ -942,16 +965,16 @@ verbosity_hook(const char *newval)
pset.verbosity = PQERRORS_VERBOSE;
else
{
- psql_error("unrecognized value \"%s\" for \"%s\"; assuming \"%s\"\n",
- newval, "VERBOSITY", "default");
- pset.verbosity = PQERRORS_DEFAULT;
+ PsqlVarEnumError("VERBOSITY", newval, "default, terse, verbose");
+ return false;
}
if (pset.db)
PQsetErrorVerbosity(pset.db, pset.verbosity);
+ return true;
}
-static void
+static bool
show_context_hook(const char *newval)
{
if (newval == NULL)
@@ -964,13 +987,13 @@ show_context_hook(const char *newval)
pset.show_context = PQSHOW_CONTEXT_ALWAYS;
else
{
- psql_error("unrecognized value \"%s\" for \"%s\"; assuming \"%s\"\n",
- newval, "SHOW_CONTEXT", "errors");
- pset.show_context = PQSHOW_CONTEXT_ERRORS;
+ PsqlVarEnumError("SHOW_CONTEXT", newval, "never, errors, always");
+ return false;
}
if (pset.db)
PQsetErrorContextVisibility(pset.db, pset.show_context);
+ return true;
}
diff --git a/src/bin/psql/variables.c b/src/bin/psql/variables.c
index 2669572aa7..91e4ae8095 100644
--- a/src/bin/psql/variables.c
+++ b/src/bin/psql/variables.c
@@ -58,6 +58,11 @@ CreateVariableSpace(void)
return ptr;
}
+/*
+ * Get string value of variable, or NULL if it's not defined.
+ *
+ * Note: result is valid until variable is next assigned to.
+ */
const char *
GetVariable(VariableSpace space, const char *name)
{
@@ -79,94 +84,121 @@ GetVariable(VariableSpace space, const char *name)
}
/*
- * Try to interpret "value" as boolean value.
+ * Try to interpret "value" as a boolean value, and if successful,
+ * store it in *result. Otherwise don't clobber *result.
*
* Valid values are: true, false, yes, no, on, off, 1, 0; as well as unique
* prefixes thereof.
*
* "name" is the name of the variable we're assigning to, to use in error
* report if any. Pass name == NULL to suppress the error report.
+ *
+ * Return true when "value" is syntactically valid, false otherwise.
*/
bool
-ParseVariableBool(const char *value, const char *name)
+ParseVariableBool(const char *value, const char *name, bool *result)
{
size_t len;
+ bool valid = true;
if (value == NULL)
- return false; /* not set -> assume "off" */
+ {
+ *result = false; /* not set -> assume "off" */
+ return valid;
+ }
len = strlen(value);
- if (pg_strncasecmp(value, "true", len) == 0)
- return true;
- else if (pg_strncasecmp(value, "false", len) == 0)
- return false;
- else if (pg_strncasecmp(value, "yes", len) == 0)
- return true;
- else if (pg_strncasecmp(value, "no", len) == 0)
- return false;
+ if (len > 0 && pg_strncasecmp(value, "true", len) == 0)
+ *result = true;
+ else if (len > 0 && pg_strncasecmp(value, "false", len) == 0)
+ *result = false;
+ else if (len > 0 && pg_strncasecmp(value, "yes", len) == 0)
+ *result = true;
+ else if (len > 0 && pg_strncasecmp(value, "no", len) == 0)
+ *result = false;
/* 'o' is not unique enough */
else if (pg_strncasecmp(value, "on", (len > 2 ? len : 2)) == 0)
- return true;
+ *result = true;
else if (pg_strncasecmp(value, "off", (len > 2 ? len : 2)) == 0)
- return false;
+ *result = false;
else if (pg_strcasecmp(value, "1") == 0)
- return true;
+ *result = true;
else if (pg_strcasecmp(value, "0") == 0)
- return false;
+ *result = false;
else
{
- /* NULL is treated as false, so a non-matching value is 'true' */
+ /* string is not recognized; don't clobber *result */
if (name)
- psql_error("unrecognized value \"%s\" for \"%s\"; assuming \"%s\"\n",
- value, name, "on");
- return true;
+ psql_error("unrecognized value \"%s\" for \"%s\": boolean expected\n",
+ value, name);
+ valid = false;
}
+ return valid;
}
-
/*
- * Read numeric variable, or defaultval if it is not set, or faultval if its
- * value is not a valid numeric string. If allowtrail is false, this will
- * include the case where there are trailing characters after the number.
+ * Try to interpret "value" as an integer value, and if successful,
+ * store it in *result. Otherwise don't clobber *result.
+ *
+ * "name" is the name of the variable we're assigning to, to use in error
+ * report if any. Pass name == NULL to suppress the error report.
+ *
+ * Return true when "value" is syntactically valid, false otherwise.
*/
-int
-ParseVariableNum(const char *val,
- int defaultval,
- int faultval,
- bool allowtrail)
+bool
+ParseVariableNum(const char *value, const char *name, int *result)
{
- int result;
+ char *end;
+ long numval;
- if (!val)
- result = defaultval;
- else if (!val[0])
- result = faultval;
+ if (value == NULL)
+ return false;
+ errno = 0;
+ numval = strtol(value, &end, 0);
+ if (errno == 0 && *end == '\0' && end != value && numval == (int) numval)
+ {
+ *result = (int) numval;
+ return true;
+ }
else
{
- char *end;
-
- result = strtol(val, &end, 0);
- if (!allowtrail && *end)
- result = faultval;
+ /* string is not recognized; don't clobber *result */
+ if (name)
+ psql_error("invalid value \"%s\" for \"%s\": integer expected\n",
+ value, name);
+ return false;
}
-
- return result;
}
+/*
+ * Read integer value of the numeric variable named "name".
+ *
+ * Return defaultval if it is not set, or faultval if its value is not a
+ * valid integer. (No error message is issued.)
+ */
int
GetVariableNum(VariableSpace space,
const char *name,
int defaultval,
- int faultval,
- bool allowtrail)
+ int faultval)
{
const char *val;
+ int result;
val = GetVariable(space, name);
- return ParseVariableNum(val, defaultval, faultval, allowtrail);
+ if (!val)
+ return defaultval;
+
+ if (ParseVariableNum(val, NULL, &result))
+ return result;
+ else
+ return faultval;
}
+/*
+ * Print values of all variables.
+ */
void
PrintVariables(VariableSpace space)
{
@@ -184,17 +216,28 @@ PrintVariables(VariableSpace space)
}
}
+/*
+ * Set the variable named "name" to value "value",
+ * or delete it if "value" is NULL.
+ *
+ * Returns true if successful, false if not; in the latter case a suitable
+ * error message has been printed, except for the unexpected case of
+ * space or name being NULL.
+ */
bool
SetVariable(VariableSpace space, const char *name, const char *value)
{
struct _variable *current,
*previous;
- if (!space)
+ if (!space || !name)
return false;
if (!valid_variable_name(name))
+ {
+ psql_error("invalid variable name: \"%s\"\n", name);
return false;
+ }
if (!value)
return DeleteVariable(space, name);
@@ -205,13 +248,30 @@ SetVariable(VariableSpace space, const char *name, const char *value)
{
if (strcmp(current->name, name) == 0)
{
- /* found entry, so update */
- if (current->value)
- free(current->value);
- current->value = pg_strdup(value);
+ /*
+ * Found entry, so update, unless hook returns false. The hook
+ * may need the passed value to have the same lifespan as the
+ * variable, so allocate it right away, even though we'll have to
+ * free it again if the hook returns false.
+ */
+ char *new_value = pg_strdup(value);
+ bool confirmed;
+
if (current->assign_hook)
- (*current->assign_hook) (current->value);
- return true;
+ confirmed = (*current->assign_hook) (new_value);
+ else
+ confirmed = true;
+
+ if (confirmed)
+ {
+ if (current->value)
+ pg_free(current->value);
+ current->value = new_value;
+ }
+ else
+ pg_free(new_value); /* current->value is left unchanged */
+
+ return confirmed;
}
}
@@ -226,19 +286,29 @@ SetVariable(VariableSpace space, const char *name, const char *value)
}
/*
- * This both sets a hook function, and calls it on the current value (if any)
+ * Attach an assign hook function to the named variable.
+ *
+ * If the variable doesn't already exist, create it with value NULL,
+ * just so we have a place to store the hook function. (Externally,
+ * this isn't different from it not being defined.)
+ *
+ * The hook is immediately called on the variable's current value. This is
+ * meant to let it update any derived psql state. If the hook doesn't like
+ * the current value, it will print a message to that effect, but we'll ignore
+ * it. Generally we do not expect any such failure here, because this should
+ * get called before any user-supplied value is assigned.
*/
-bool
+void
SetVariableAssignHook(VariableSpace space, const char *name, VariableAssignHook hook)
{
struct _variable *current,
*previous;
- if (!space)
- return false;
+ if (!space || !name)
+ return;
if (!valid_variable_name(name))
- return false;
+ return;
for (previous = space, current = space->next;
current;
@@ -248,8 +318,8 @@ SetVariableAssignHook(VariableSpace space, const char *name, VariableAssignHook
{
/* found entry, so update */
current->assign_hook = hook;
- (*hook) (current->value);
- return true;
+ (void) (*hook) (current->value);
+ return;
}
}
@@ -260,16 +330,24 @@ SetVariableAssignHook(VariableSpace space, const char *name, VariableAssignHook
current->assign_hook = hook;
current->next = NULL;
previous->next = current;
- (*hook) (NULL);
- return true;
+ (void) (*hook) (NULL);
}
+/*
+ * Convenience function to set a variable's value to "on".
+ */
bool
SetVariableBool(VariableSpace space, const char *name)
{
return SetVariable(space, name, "on");
}
+/*
+ * Attempt to delete variable.
+ *
+ * If unsuccessful, print a message and return "false".
+ * Deleting a nonexistent variable is not an error.
+ */
bool
DeleteVariable(VariableSpace space, const char *name)
{
@@ -277,7 +355,7 @@ DeleteVariable(VariableSpace space, const char *name)
*previous;
if (!space)
- return false;
+ return true;
for (previous = space, current = space->next;
current;
@@ -285,14 +363,21 @@ DeleteVariable(VariableSpace space, const char *name)
{
if (strcmp(current->name, name) == 0)
{
- if (current->value)
- free(current->value);
- current->value = NULL;
- /* Physically delete only if no hook function to remember */
if (current->assign_hook)
- (*current->assign_hook) (NULL);
+ {
+ /* Allow deletion only if hook is okay with NULL value */
+ if (!(*current->assign_hook) (NULL))
+ return false; /* message printed by hook */
+ if (current->value)
+ free(current->value);
+ current->value = NULL;
+ /* Don't delete entry, or we'd forget the hook function */
+ }
else
{
+ /* We can delete the entry as well as its value */
+ if (current->value)
+ free(current->value);
previous->next = current->next;
free(current->name);
free(current);
@@ -303,3 +388,16 @@ DeleteVariable(VariableSpace space, const char *name)
return true;
}
+
+/*
+ * Emit error with suggestions for variables or commands
+ * accepting enum-style arguments.
+ * This function just exists to standardize the wording.
+ * suggestions should follow the format "fee, fi, fo, fum".
+ */
+void
+PsqlVarEnumError(const char *name, const char *value, const char *suggestions)
+{
+ psql_error("unrecognized value \"%s\" for \"%s\"\nAvailable values are: %s.\n",
+ value, name, suggestions);
+}
diff --git a/src/bin/psql/variables.h b/src/bin/psql/variables.h
index d235b1798e..274b4af553 100644
--- a/src/bin/psql/variables.h
+++ b/src/bin/psql/variables.h
@@ -3,25 +3,39 @@
*
* Copyright (c) 2000-2017, PostgreSQL Global Development Group
*
+ * This implements a sort of variable repository. One could also think of it
+ * as a cheap version of an associative array. Each variable has a string
+ * name and a string value. The value can't be NULL, or more precisely
+ * that's not distinguishable from the variable being unset.
+ *
* src/bin/psql/variables.h
*/
#ifndef VARIABLES_H
#define VARIABLES_H
/*
- * This implements a sort of variable repository. One could also think of it
- * as a cheap version of an associative array. In each one of these
- * datastructures you can store name/value pairs. There can also be an
- * "assign hook" function that is called whenever the variable's value is
- * changed.
+ * Variables can be given "assign hook" functions. The assign hook can
+ * prevent invalid values from being assigned, and can update internal C
+ * variables to keep them in sync with the variable's current value.
+ *
+ * A hook function is called before any attempted assignment, with the
+ * proposed new value of the variable (or with NULL, if an \unset is being
+ * attempted). If it returns false, the assignment doesn't occur --- it
+ * should print an error message with psql_error() to tell the user why.
*
- * An "unset" operation causes the hook to be called with newval == NULL.
+ * When a hook function is installed with SetVariableAssignHook(), it is
+ * called with the variable's current value (or with NULL, if it wasn't set
+ * yet). But its return value is ignored in this case. The hook should be
+ * set before any possibly-invalid value can be assigned.
+ */
+typedef bool (*VariableAssignHook) (const char *newval);
+
+/*
+ * Data structure representing one variable.
*
* Note: if value == NULL then the variable is logically unset, but we are
* keeping the struct around so as not to forget about its hook function.
*/
-typedef void (*VariableAssignHook) (const char *newval);
-
struct _variable
{
char *name;
@@ -30,27 +44,31 @@ struct _variable
struct _variable *next;
};
+/* Data structure representing a set of variables */
typedef struct _variable *VariableSpace;
+
VariableSpace CreateVariableSpace(void);
const char *GetVariable(VariableSpace space, const char *name);
-bool ParseVariableBool(const char *value, const char *name);
-int ParseVariableNum(const char *val,
- int defaultval,
- int faultval,
- bool allowtrail);
+bool ParseVariableBool(const char *value, const char *name,
+ bool *result);
+
+bool ParseVariableNum(const char *value, const char *name,
+ int *result);
+
int GetVariableNum(VariableSpace space,
const char *name,
int defaultval,
- int faultval,
- bool allowtrail);
+ int faultval);
void PrintVariables(VariableSpace space);
bool SetVariable(VariableSpace space, const char *name, const char *value);
-bool SetVariableAssignHook(VariableSpace space, const char *name, VariableAssignHook hook);
+void SetVariableAssignHook(VariableSpace space, const char *name, VariableAssignHook hook);
bool SetVariableBool(VariableSpace space, const char *name);
bool DeleteVariable(VariableSpace space, const char *name);
+void PsqlVarEnumError(const char *name, const char *value, const char *suggestions);
+
#endif /* VARIABLES_H */
diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out
index 464436ab3b..420825aa56 100644
--- a/src/test/regress/expected/psql.out
+++ b/src/test/regress/expected/psql.out
@@ -2,6 +2,15 @@
-- Tests for psql features that aren't closely connected to any
-- specific server features
--
+-- \set
+-- fail: invalid name
+\set invalid/name foo
+invalid variable name: "invalid/name"
+-- fail: invalid value for special variable
+\set AUTOCOMMIT foo
+unrecognized value "foo" for "AUTOCOMMIT": boolean expected
+\set FETCH_COUNT foo
+invalid value "foo" for "FETCH_COUNT": integer expected
-- \gset
select 10 as test01, 20 as test02, 'Hello' as test03 \gset pref01_
\echo :pref01_test01 :pref01_test02 :pref01_test03
@@ -9,7 +18,7 @@ select 10 as test01, 20 as test02, 'Hello' as test03 \gset pref01_
-- should fail: bad variable name
select 10 as "bad name"
\gset
-could not set variable "bad name"
+invalid variable name: "bad name"
-- multiple backslash commands in one line
select 1 as x, 2 as y \gset pref01_ \\ \echo :pref01_x
1
diff --git a/src/test/regress/sql/psql.sql b/src/test/regress/sql/psql.sql
index 900aa7ee1e..79624b9193 100644
--- a/src/test/regress/sql/psql.sql
+++ b/src/test/regress/sql/psql.sql
@@ -3,6 +3,14 @@
-- specific server features
--
+-- \set
+
+-- fail: invalid name
+\set invalid/name foo
+-- fail: invalid value for special variable
+\set AUTOCOMMIT foo
+\set FETCH_COUNT foo
+
-- \gset
select 10 as test01, 20 as test02, 'Hello' as test03 \gset pref01_
--
cgit v1.2.3
From de16ab7238888b16825ad13f0bbe123632915e9b Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Mon, 30 Jan 2017 18:00:26 -0500
Subject: Invent pg_hba_file_rules view to show the content of pg_hba.conf.
This view is designed along the same lines as pg_file_settings, to wit
it shows what is currently in the file, not what the postmaster has
loaded as the active settings. That allows it to be used to pre-vet
edits before issuing SIGHUP. As with the earlier view, go out of our
way to allow errors in the file to be reflected in the view, to assist
that use-case.
(We might at some point invent a view to show the current active settings,
but this is not that patch; and it's not trivial to do.)
Haribabu Kommi, reviewed by Ashutosh Bapat, Michael Paquier, Simon Riggs,
and myself
Discussion: https://postgr.es/m/CAJrrPGerH4jiwpcXT1-46QXUDmNp2QDrG9+-Tek_xC8APHShYw@mail.gmail.com
---
doc/src/sgml/catalogs.sgml | 113 +++++
doc/src/sgml/client-auth.sgml | 18 +
src/backend/catalog/system_views.sql | 6 +
src/backend/libpq/hba.c | 824 ++++++++++++++++++++++++++++-----
src/include/catalog/catversion.h | 2 +-
src/include/catalog/pg_proc.h | 2 +
src/include/libpq/hba.h | 9 +-
src/test/regress/expected/rules.out | 10 +
src/test/regress/expected/sysviews.out | 7 +
src/test/regress/sql/sysviews.sql | 3 +
10 files changed, 873 insertions(+), 121 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 086fafc694..204b8cfd55 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -7808,6 +7808,11 @@
groups of database users
+
+ pg_hba_file_rules
+ summary of client authentication configuration file contents
+
+
pg_indexesindexes
@@ -8408,6 +8413,114 @@
+
+ pg_hba_file_rules
+
+
+ pg_hba_file_rules
+
+
+
+ The view pg_hba_file_rules provides a summary of
+ the contents of the client authentication configuration
+ file, pg_hba.conf>. A row appears in this view for each
+ non-empty, non-comment line in the file, with annotations indicating
+ whether the rule could be applied successfully.
+
+
+
+ This view can be helpful for checking whether planned changes in the
+ authentication configuration file will work, or for diagnosing a previous
+ failure. Note that this view reports on the current> contents
+ of the file, not on what was last loaded by the server.
+
+
+
+ By default, the pg_hba_file_rules view can be read
+ only by superusers.
+
+
+
+ pg_hba_file_rules> Columns
+
+
+
+
+ Name
+ Type
+ Description
+
+
+
+
+ line_number
+ integer
+
+ Line number of this rule in pg_hba.conf>
+
+
+
+ type
+ text
+ Type of connection
+
+
+ database
+ text[]
+ List of database name(s) to which this rule applies
+
+
+ user_name
+ text[]
+ List of user and group name(s) to which this rule applies
+
+
+ address
+ text
+
+ Host name or IP address, or one
+ of all, samehost,
+ or samenet, or null for local connections
+
+
+
+ netmask
+ text
+ IP address mask, or null if not applicable
+
+
+ auth_method
+ text
+ Authentication method
+
+
+ options
+ text[]
+ Options specified for authentication method, if any
+
+
+ error
+ text
+
+ If not null, an error message indicating why this
+ line could not be processed
+
+
+
+
+
+
+
+ Usually, a row reflecting an incorrect entry will have values for only
+ the line_number> and error> fields.
+
+
+
+ See for more information about
+ client authentication configuration.
+
+
+
pg_indexes
diff --git a/doc/src/sgml/client-auth.sgml b/doc/src/sgml/client-auth.sgml
index dda5891900..231fc40fc3 100644
--- a/doc/src/sgml/client-auth.sgml
+++ b/doc/src/sgml/client-auth.sgml
@@ -597,6 +597,24 @@ hostnossl databaseuser
re-read the file.
+
+
+ The preceding statement is not true on Microsoft Windows: there, any
+ changes in the pg_hba.conf file are immediately
+ applied by subsequent new connections.
+
+
+
+
+ The system view
+ pg_hba_file_rules
+ can be helpful for pre-testing changes to the pg_hba.conf>
+ file, or for diagnosing problems if loading of the file did not have the
+ desired effects. Rows in the view with
+ non-null error fields indicate problems in the
+ corresponding lines of the file.
+
+
To connect to a particular database, a user must not only pass the
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 4dfedf89b6..28be27a07e 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -459,6 +459,12 @@ CREATE VIEW pg_file_settings AS
REVOKE ALL on pg_file_settings FROM PUBLIC;
REVOKE EXECUTE ON FUNCTION pg_show_all_file_settings() FROM PUBLIC;
+CREATE VIEW pg_hba_file_rules AS
+ SELECT * FROM pg_hba_file_rules() AS A;
+
+REVOKE ALL on pg_hba_file_rules FROM PUBLIC;
+REVOKE EXECUTE ON FUNCTION pg_hba_file_rules() FROM PUBLIC;
+
CREATE VIEW pg_timezone_abbrevs AS
SELECT * FROM pg_timezone_abbrevs();
diff --git a/src/backend/libpq/hba.c b/src/backend/libpq/hba.c
index bbe0a888ff..5bb8eb93b1 100644
--- a/src/backend/libpq/hba.c
+++ b/src/backend/libpq/hba.c
@@ -25,15 +25,20 @@
#include
#include
+#include "access/htup_details.h"
#include "catalog/pg_collation.h"
+#include "catalog/pg_type.h"
#include "common/ip.h"
+#include "funcapi.h"
#include "libpq/ifaddr.h"
#include "libpq/libpq.h"
+#include "miscadmin.h"
#include "postmaster/postmaster.h"
#include "regex/regex.h"
#include "replication/walsender.h"
#include "storage/fd.h"
#include "utils/acl.h"
+#include "utils/builtins.h"
#include "utils/guc.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
@@ -80,12 +85,15 @@ typedef struct HbaToken
* Each item in the "fields" list is a sub-list of HbaTokens.
* We don't emit a TokenizedLine for empty or all-comment lines,
* so "fields" is never NIL (nor are any of its sub-lists).
+ * Exception: if an error occurs during tokenization, we might
+ * have fields == NIL, in which case err_msg != NULL.
*/
typedef struct TokenizedLine
{
List *fields; /* List of lists of HbaTokens */
int line_num; /* Line number */
char *raw_line; /* Raw line text */
+ char *err_msg; /* Error message if any */
} TokenizedLine;
/*
@@ -106,13 +114,42 @@ static MemoryContext parsed_hba_context = NULL;
static List *parsed_ident_lines = NIL;
static MemoryContext parsed_ident_context = NULL;
+/*
+ * The following character array represents the names of the authentication
+ * methods that are supported by PostgreSQL.
+ *
+ * Note: keep this in sync with the UserAuth enum in hba.h.
+ */
+static const char *const UserAuthName[] =
+{
+ "reject",
+ "implicit reject", /* Not a user-visible option */
+ "trust",
+ "ident",
+ "password",
+ "md5",
+ "gss",
+ "sspi",
+ "pam",
+ "bsd",
+ "ldap",
+ "cert",
+ "radius",
+ "peer"
+};
+
static MemoryContext tokenize_file(const char *filename, FILE *file,
- List **tok_lines);
+ List **tok_lines, int elevel);
static List *tokenize_inc_file(List *tokens, const char *outer_filename,
- const char *inc_filename);
+ const char *inc_filename, int elevel, char **err_msg);
static bool parse_hba_auth_opt(char *name, char *val, HbaLine *hbaline,
- int line_num);
+ int elevel, char **err_msg);
+static ArrayType *gethba_options(HbaLine *hba);
+static void fill_hba_line(Tuplestorestate *tuple_store, TupleDesc tupdesc,
+ int lineno, HbaLine *hba, const char *err_msg);
+static void fill_hba_view(Tuplestorestate *tuple_store, TupleDesc tupdesc);
+
/*
* isblank() exists in the ISO C99 spec, but it's not very portable yet,
@@ -126,32 +163,37 @@ pg_isblank(const char c)
/*
- * Grab one token out of the string pointed to by lineptr.
+ * Grab one token out of the string pointed to by *lineptr.
+ *
* Tokens are strings of non-blank
* characters bounded by blank characters, commas, beginning of line, and
* end of line. Blank means space or tab. Tokens can be delimited by
* double quotes (this allows the inclusion of blanks, but not newlines).
+ * Comments (started by an unquoted '#') are skipped.
+ *
+ * The token, if any, is returned at *buf (a buffer of size bufsz), and
+ * *lineptr is advanced past the token.
*
- * The token, if any, is returned at *buf (a buffer of size bufsz).
* Also, we set *initial_quote to indicate whether there was quoting before
* the first character. (We use that to prevent "@x" from being treated
* as a file inclusion request. Note that @"x" should be so treated;
* we want to allow that to support embedded spaces in file paths.)
+ *
* We set *terminating_comma to indicate whether the token is terminated by a
- * comma (which is not returned.)
+ * comma (which is not returned).
+ *
+ * In event of an error, log a message at ereport level elevel, and also
+ * set *err_msg to a string describing the error. Currently the only
+ * possible error is token too long for buf.
*
* If successful: store null-terminated token at *buf and return TRUE.
* If no more tokens on line: set *buf = '\0' and return FALSE.
- *
- * Leave file positioned at the character immediately after the token or EOF,
- * whichever comes first. If no more tokens on line, position the file to the
- * beginning of the next line or EOF, whichever comes first.
- *
- * Handle comments.
+ * If error: fill buf with truncated or misformatted token and return FALSE.
*/
static bool
-next_token(char **lineptr, char *buf, int bufsz, bool *initial_quote,
- bool *terminating_comma)
+next_token(char **lineptr, char *buf, int bufsz,
+ bool *initial_quote, bool *terminating_comma,
+ int elevel, char **err_msg)
{
int c;
char *start_buf = buf;
@@ -197,14 +239,15 @@ next_token(char **lineptr, char *buf, int bufsz, bool *initial_quote,
if (buf >= end_buf)
{
*buf = '\0';
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("authentication file token too long, skipping: \"%s\"",
start_buf)));
+ *err_msg = "authentication file token too long";
/* Discard remainder of line */
while ((c = (*(*lineptr)++)) != '\0' && c != '\n')
;
- break;
+ return false;
}
/* we do not pass back the comma in the token */
@@ -245,13 +288,17 @@ next_token(char **lineptr, char *buf, int bufsz, bool *initial_quote,
return (saw_quote || buf > start_buf);
}
+/*
+ * Construct a palloc'd HbaToken struct, copying the given string.
+ */
static HbaToken *
-make_hba_token(char *token, bool quoted)
+make_hba_token(const char *token, bool quoted)
{
HbaToken *hbatoken;
int toklen;
toklen = strlen(token);
+ /* we copy string into same palloc block as the struct */
hbatoken = (HbaToken *) palloc(sizeof(HbaToken) + toklen + 1);
hbatoken->string = (char *) hbatoken + sizeof(HbaToken);
hbatoken->quoted = quoted;
@@ -275,11 +322,20 @@ copy_hba_token(HbaToken *in)
/*
* Tokenize one HBA field from a line, handling file inclusion and comma lists.
*
- * The result is a List of HbaToken structs for each individual token,
+ * filename: current file's pathname (needed to resolve relative pathnames)
+ * *lineptr: current line pointer, which will be advanced past field
+ *
+ * In event of an error, log a message at ereport level elevel, and also
+ * set *err_msg to a string describing the error. Note that the result
+ * may be non-NIL anyway, so *err_msg must be tested to determine whether
+ * there was an error.
+ *
+ * The result is a List of HbaToken structs, one for each token in the field,
* or NIL if we reached EOL.
*/
static List *
-next_field_expand(const char *filename, char **lineptr)
+next_field_expand(const char *filename, char **lineptr,
+ int elevel, char **err_msg)
{
char buf[MAX_TOKEN];
bool trailing_comma;
@@ -288,15 +344,18 @@ next_field_expand(const char *filename, char **lineptr)
do
{
- if (!next_token(lineptr, buf, sizeof(buf), &initial_quote, &trailing_comma))
+ if (!next_token(lineptr, buf, sizeof(buf),
+ &initial_quote, &trailing_comma,
+ elevel, err_msg))
break;
/* Is this referencing a file? */
if (!initial_quote && buf[0] == '@' && buf[1] != '\0')
- tokens = tokenize_inc_file(tokens, filename, buf + 1);
+ tokens = tokenize_inc_file(tokens, filename, buf + 1,
+ elevel, err_msg);
else
tokens = lappend(tokens, make_hba_token(buf, initial_quote));
- } while (trailing_comma);
+ } while (trailing_comma && (*err_msg == NULL));
return tokens;
}
@@ -307,13 +366,21 @@ next_field_expand(const char *filename, char **lineptr)
*
* Opens and tokenises a file included from another HBA config file with @,
* and returns all values found therein as a flat list of HbaTokens. If a
- * @-token is found, recursively expand it. The given token list is used as
- * initial contents of list (so foo,bar,@baz does what you expect).
+ * @-token is found, recursively expand it. The newly read tokens are
+ * appended to "tokens" (so that foo,bar,@baz does what you expect).
+ * All new tokens are allocated in caller's memory context.
+ *
+ * In event of an error, log a message at ereport level elevel, and also
+ * set *err_msg to a string describing the error. Note that the result
+ * may be non-NIL anyway, so *err_msg must be tested to determine whether
+ * there was an error.
*/
static List *
tokenize_inc_file(List *tokens,
const char *outer_filename,
- const char *inc_filename)
+ const char *inc_filename,
+ int elevel,
+ char **err_msg)
{
char *inc_fullname;
FILE *inc_file;
@@ -340,16 +407,20 @@ tokenize_inc_file(List *tokens,
inc_file = AllocateFile(inc_fullname, "r");
if (inc_file == NULL)
{
- ereport(LOG,
+ int save_errno = errno;
+
+ ereport(elevel,
(errcode_for_file_access(),
errmsg("could not open secondary authentication file \"@%s\" as \"%s\": %m",
inc_filename, inc_fullname)));
+ *err_msg = psprintf("could not open secondary authentication file \"@%s\" as \"%s\": %s",
+ inc_filename, inc_fullname, strerror(save_errno));
pfree(inc_fullname);
return tokens;
}
/* There is possible recursion here if the file contains @ */
- linecxt = tokenize_file(inc_fullname, inc_file, &inc_lines);
+ linecxt = tokenize_file(inc_fullname, inc_file, &inc_lines, elevel);
FreeFile(inc_file);
pfree(inc_fullname);
@@ -360,6 +431,13 @@ tokenize_inc_file(List *tokens,
TokenizedLine *tok_line = (TokenizedLine *) lfirst(inc_line);
ListCell *inc_field;
+ /* If any line has an error, propagate that up to caller */
+ if (tok_line->err_msg)
+ {
+ *err_msg = pstrdup(tok_line->err_msg);
+ break;
+ }
+
foreach(inc_field, tok_line->fields)
{
List *inc_tokens = lfirst(inc_field);
@@ -383,13 +461,20 @@ tokenize_inc_file(List *tokens,
*
* The output is a list of TokenizedLine structs; see struct definition above.
*
- * filename must be the absolute path to the target file.
+ * filename: the absolute path to the target file
+ * file: the already-opened target file
+ * tok_lines: receives output list
+ * elevel: message logging level
+ *
+ * Errors are reported by logging messages at ereport level elevel and by
+ * adding TokenizedLine structs containing non-null err_msg fields to the
+ * output list.
*
* Return value is a memory context which contains all memory allocated by
* this function (it's a child of caller's context).
*/
static MemoryContext
-tokenize_file(const char *filename, FILE *file, List **tok_lines)
+tokenize_file(const char *filename, FILE *file, List **tok_lines, int elevel)
{
int line_number = 1;
MemoryContext linecxt;
@@ -407,16 +492,32 @@ tokenize_file(const char *filename, FILE *file, List **tok_lines)
char rawline[MAX_LINE];
char *lineptr;
List *current_line = NIL;
+ char *err_msg = NULL;
if (!fgets(rawline, sizeof(rawline), file))
- break;
+ {
+ int save_errno = errno;
+
+ if (!ferror(file))
+ break; /* normal EOF */
+ /* I/O error! */
+ ereport(elevel,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m", filename)));
+ err_msg = psprintf("could not read file \"%s\": %s",
+ filename, strerror(save_errno));
+ rawline[0] = '\0';
+ }
if (strlen(rawline) == MAX_LINE - 1)
+ {
/* Line too long! */
- ereport(ERROR,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("authentication file line too long"),
errcontext("line %d of configuration file \"%s\"",
line_number, filename)));
+ err_msg = "authentication file line too long";
+ }
/* Strip trailing linebreak from rawline */
lineptr = rawline + strlen(rawline) - 1;
@@ -425,18 +526,19 @@ tokenize_file(const char *filename, FILE *file, List **tok_lines)
/* Parse fields */
lineptr = rawline;
- while (*lineptr)
+ while (*lineptr && err_msg == NULL)
{
List *current_field;
- current_field = next_field_expand(filename, &lineptr);
+ current_field = next_field_expand(filename, &lineptr,
+ elevel, &err_msg);
/* add field to line, unless we are at EOL or comment start */
if (current_field != NIL)
current_line = lappend(current_line, current_field);
}
/* Reached EOL; emit line to TokenizedLine list unless it's boring */
- if (current_line != NIL)
+ if (current_line != NIL || err_msg != NULL)
{
TokenizedLine *tok_line;
@@ -444,6 +546,7 @@ tokenize_file(const char *filename, FILE *file, List **tok_lines)
tok_line->fields = current_line;
tok_line->line_num = line_number;
tok_line->raw_line = pstrdup(rawline);
+ tok_line->err_msg = err_msg;
*tok_lines = lappend(*tok_lines, tok_line);
}
@@ -746,6 +849,10 @@ check_same_host_or_net(SockAddr *raddr, IPCompareMethod method)
/*
* Macros used to check and report on invalid configuration options.
+ * On error: log a message at level elevel, set *err_msg, and exit the function.
+ * These macros are not as general-purpose as they look, because they know
+ * what the calling function's error-exit value is.
+ *
* INVALID_AUTH_OPTION = reports when an option is specified for a method where it's
* not supported.
* REQUIRE_AUTH_OPTION = same as INVALID_AUTH_OPTION, except it also checks if the
@@ -754,44 +861,56 @@ check_same_host_or_net(SockAddr *raddr, IPCompareMethod method)
* MANDATORY_AUTH_ARG = check if a required option is set for an authentication method,
* reporting error if it's not.
*/
-#define INVALID_AUTH_OPTION(optname, validmethods) do {\
- ereport(LOG, \
+#define INVALID_AUTH_OPTION(optname, validmethods) \
+do { \
+ ereport(elevel, \
(errcode(ERRCODE_CONFIG_FILE_ERROR), \
/* translator: the second %s is a list of auth methods */ \
errmsg("authentication option \"%s\" is only valid for authentication methods %s", \
optname, _(validmethods)), \
errcontext("line %d of configuration file \"%s\"", \
line_num, HbaFileName))); \
+ *err_msg = psprintf("authentication option \"%s\" is only valid for authentication methods %s", \
+ optname, validmethods); \
return false; \
-} while (0);
+} while (0)
-#define REQUIRE_AUTH_OPTION(methodval, optname, validmethods) do {\
+#define REQUIRE_AUTH_OPTION(methodval, optname, validmethods) \
+do { \
if (hbaline->auth_method != methodval) \
INVALID_AUTH_OPTION(optname, validmethods); \
-} while (0);
+} while (0)
-#define MANDATORY_AUTH_ARG(argvar, argname, authname) do {\
- if (argvar == NULL) {\
- ereport(LOG, \
+#define MANDATORY_AUTH_ARG(argvar, argname, authname) \
+do { \
+ if (argvar == NULL) { \
+ ereport(elevel, \
(errcode(ERRCODE_CONFIG_FILE_ERROR), \
errmsg("authentication method \"%s\" requires argument \"%s\" to be set", \
authname, argname), \
errcontext("line %d of configuration file \"%s\"", \
line_num, HbaFileName))); \
+ *err_msg = psprintf("authentication method \"%s\" requires argument \"%s\" to be set", \
+ authname, argname); \
return NULL; \
} \
-} while (0);
+} while (0)
/*
+ * Macros for handling pg_ident problems.
+ * Much as above, but currently the message level is hardwired as LOG
+ * and there is no provision for an err_msg string.
+ *
* IDENT_FIELD_ABSENT:
- * Throw an error and exit the function if the given ident field ListCell is
+ * Log a message and exit the function if the given ident field ListCell is
* not populated.
*
* IDENT_MULTI_VALUE:
- * Throw an error and exit the function if the given ident token List has more
+ * Log a message and exit the function if the given ident token List has more
* than one element.
*/
-#define IDENT_FIELD_ABSENT(field) do {\
+#define IDENT_FIELD_ABSENT(field) \
+do { \
if (!field) { \
ereport(LOG, \
(errcode(ERRCODE_CONFIG_FILE_ERROR), \
@@ -799,9 +918,10 @@ check_same_host_or_net(SockAddr *raddr, IPCompareMethod method)
IdentFileName, line_num))); \
return NULL; \
} \
-} while (0);
+} while (0)
-#define IDENT_MULTI_VALUE(tokens) do {\
+#define IDENT_MULTI_VALUE(tokens) \
+do { \
if (tokens->length > 1) { \
ereport(LOG, \
(errcode(ERRCODE_CONFIG_FILE_ERROR), \
@@ -810,23 +930,26 @@ check_same_host_or_net(SockAddr *raddr, IPCompareMethod method)
line_num, IdentFileName))); \
return NULL; \
} \
-} while (0);
+} while (0)
/*
* Parse one tokenised line from the hba config file and store the result in a
* HbaLine structure.
*
- * Return NULL if parsing fails.
+ * If parsing fails, log a message at ereport level elevel, store an error
+ * string in tok_line->err_msg, and return NULL. (Some non-error conditions
+ * can also result in such messages.)
*
* Note: this function leaks memory when an error occurs. Caller is expected
* to have set a memory context that will be reset if this function returns
* NULL.
*/
static HbaLine *
-parse_hba_line(TokenizedLine *tok_line)
+parse_hba_line(TokenizedLine *tok_line, int elevel)
{
int line_num = tok_line->line_num;
+ char **err_msg = &tok_line->err_msg;
char *str;
struct addrinfo *gai_result;
struct addrinfo hints;
@@ -849,12 +972,13 @@ parse_hba_line(TokenizedLine *tok_line)
tokens = lfirst(field);
if (tokens->length > 1)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("multiple values specified for connection type"),
errhint("Specify exactly one connection type per line."),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = "multiple values specified for connection type";
return NULL;
}
token = linitial(tokens);
@@ -863,11 +987,12 @@ parse_hba_line(TokenizedLine *tok_line)
#ifdef HAVE_UNIX_SOCKETS
parsedline->conntype = ctLocal;
#else
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("local connections are not supported by this build"),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = "local connections are not supported by this build";
return NULL;
#endif
}
@@ -882,19 +1007,23 @@ parse_hba_line(TokenizedLine *tok_line)
/* Log a warning if SSL support is not active */
#ifdef USE_SSL
if (!EnableSSL)
- ereport(LOG,
+ {
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("hostssl record cannot match because SSL is disabled"),
errhint("Set ssl = on in postgresql.conf."),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = "hostssl record cannot match because SSL is disabled";
+ }
#else
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("hostssl record cannot match because SSL is not supported by this build"),
errhint("Compile with --with-openssl to use SSL connections."),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = "hostssl record cannot match because SSL is not supported by this build";
#endif
}
else if (token->string[4] == 'n') /* "hostnossl" */
@@ -909,12 +1038,13 @@ parse_hba_line(TokenizedLine *tok_line)
} /* record type */
else
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid connection type \"%s\"",
token->string),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = psprintf("invalid connection type \"%s\"", token->string);
return NULL;
}
@@ -922,11 +1052,12 @@ parse_hba_line(TokenizedLine *tok_line)
field = lnext(field);
if (!field)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("end-of-line before database specification"),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = "end-of-line before database specification";
return NULL;
}
parsedline->databases = NIL;
@@ -941,11 +1072,12 @@ parse_hba_line(TokenizedLine *tok_line)
field = lnext(field);
if (!field)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("end-of-line before role specification"),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = "end-of-line before role specification";
return NULL;
}
parsedline->roles = NIL;
@@ -962,22 +1094,24 @@ parse_hba_line(TokenizedLine *tok_line)
field = lnext(field);
if (!field)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("end-of-line before IP address specification"),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = "end-of-line before IP address specification";
return NULL;
}
tokens = lfirst(field);
if (tokens->length > 1)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("multiple values specified for host address"),
errhint("Specify one address range per line."),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = "multiple values specified for host address";
return NULL;
}
token = linitial(tokens);
@@ -1027,12 +1161,14 @@ parse_hba_line(TokenizedLine *tok_line)
parsedline->hostname = str;
else
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid IP address \"%s\": %s",
str, gai_strerror(ret)),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = psprintf("invalid IP address \"%s\": %s",
+ str, gai_strerror(ret));
if (gai_result)
pg_freeaddrinfo_all(hints.ai_family, gai_result);
return NULL;
@@ -1045,24 +1181,28 @@ parse_hba_line(TokenizedLine *tok_line)
{
if (parsedline->hostname)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("specifying both host name and CIDR mask is invalid: \"%s\"",
token->string),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = psprintf("specifying both host name and CIDR mask is invalid: \"%s\"",
+ token->string);
return NULL;
}
if (pg_sockaddr_cidr_mask(&parsedline->mask, cidr_slash + 1,
parsedline->addr.ss_family) < 0)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid CIDR mask in address \"%s\"",
token->string),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = psprintf("invalid CIDR mask in address \"%s\"",
+ token->string);
return NULL;
}
pfree(str);
@@ -1074,22 +1214,24 @@ parse_hba_line(TokenizedLine *tok_line)
field = lnext(field);
if (!field)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("end-of-line before netmask specification"),
errhint("Specify an address range in CIDR notation, or provide a separate netmask."),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = "end-of-line before netmask specification";
return NULL;
}
tokens = lfirst(field);
if (tokens->length > 1)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("multiple values specified for netmask"),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = "multiple values specified for netmask";
return NULL;
}
token = linitial(tokens);
@@ -1098,12 +1240,14 @@ parse_hba_line(TokenizedLine *tok_line)
&hints, &gai_result);
if (ret || !gai_result)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid IP mask \"%s\": %s",
token->string, gai_strerror(ret)),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = psprintf("invalid IP mask \"%s\": %s",
+ token->string, gai_strerror(ret));
if (gai_result)
pg_freeaddrinfo_all(hints.ai_family, gai_result);
return NULL;
@@ -1115,11 +1259,12 @@ parse_hba_line(TokenizedLine *tok_line)
if (parsedline->addr.ss_family != parsedline->mask.ss_family)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("IP address and mask do not match"),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = "IP address and mask do not match";
return NULL;
}
}
@@ -1130,22 +1275,24 @@ parse_hba_line(TokenizedLine *tok_line)
field = lnext(field);
if (!field)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("end-of-line before authentication method"),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = "end-of-line before authentication method";
return NULL;
}
tokens = lfirst(field);
if (tokens->length > 1)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("multiple values specified for authentication type"),
errhint("Specify exactly one authentication type per line."),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = "multiple values specified for authentication type";
return NULL;
}
token = linitial(tokens);
@@ -1177,11 +1324,12 @@ parse_hba_line(TokenizedLine *tok_line)
{
if (Db_user_namespace)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("MD5 authentication is not supported when \"db_user_namespace\" is enabled"),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = "MD5 authentication is not supported when \"db_user_namespace\" is enabled";
return NULL;
}
parsedline->auth_method = uaMD5;
@@ -1214,23 +1362,27 @@ parse_hba_line(TokenizedLine *tok_line)
parsedline->auth_method = uaRADIUS;
else
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid authentication method \"%s\"",
token->string),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = psprintf("invalid authentication method \"%s\"",
+ token->string);
return NULL;
}
if (unsupauth)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid authentication method \"%s\": not supported by this build",
token->string),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = psprintf("invalid authentication method \"%s\": not supported by this build",
+ token->string);
return NULL;
}
@@ -1246,22 +1398,24 @@ parse_hba_line(TokenizedLine *tok_line)
if (parsedline->conntype == ctLocal &&
parsedline->auth_method == uaGSS)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("gssapi authentication is not supported on local sockets"),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = "gssapi authentication is not supported on local sockets";
return NULL;
}
if (parsedline->conntype != ctLocal &&
parsedline->auth_method == uaPeer)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("peer authentication is only supported on local sockets"),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = "peer authentication is only supported on local sockets";
return NULL;
}
@@ -1274,11 +1428,12 @@ parse_hba_line(TokenizedLine *tok_line)
if (parsedline->conntype != ctHostSSL &&
parsedline->auth_method == uaCert)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("cert authentication is only supported on hostssl connections"),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = "cert authentication is only supported on hostssl connections";
return NULL;
}
@@ -1323,16 +1478,18 @@ parse_hba_line(TokenizedLine *tok_line)
/*
* Got something that's not a name=value pair.
*/
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("authentication option not in name=value format: %s", token->string),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = psprintf("authentication option not in name=value format: %s",
+ token->string);
return NULL;
}
*val++ = '\0'; /* str now holds "name", val holds "value" */
- if (!parse_hba_auth_opt(str, val, parsedline, line_num))
+ if (!parse_hba_auth_opt(str, val, parsedline, elevel, err_msg))
/* parse_hba_auth_opt already logged the error message */
return NULL;
pfree(str);
@@ -1360,21 +1517,23 @@ parse_hba_line(TokenizedLine *tok_line)
parsedline->ldapbindpasswd ||
parsedline->ldapsearchattribute)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("cannot use ldapbasedn, ldapbinddn, ldapbindpasswd, ldapsearchattribute, or ldapurl together with ldapprefix"),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = "cannot use ldapbasedn, ldapbinddn, ldapbindpasswd, ldapsearchattribute, or ldapurl together with ldapprefix";
return NULL;
}
}
else if (!parsedline->ldapbasedn)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("authentication method \"ldap\" requires argument \"ldapbasedn\", \"ldapprefix\", or \"ldapsuffix\" to be set"),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = "authentication method \"ldap\" requires argument \"ldapbasedn\", \"ldapprefix\", or \"ldapsuffix\" to be set";
return NULL;
}
}
@@ -1399,11 +1558,15 @@ parse_hba_line(TokenizedLine *tok_line)
/*
* Parse one name-value pair as an authentication option into the given
* HbaLine. Return true if we successfully parse the option, false if we
- * encounter an error.
+ * encounter an error. In the event of an error, also log a message at
+ * ereport level elevel, and store a message string into *err_msg.
*/
static bool
-parse_hba_auth_opt(char *name, char *val, HbaLine *hbaline, int line_num)
+parse_hba_auth_opt(char *name, char *val, HbaLine *hbaline,
+ int elevel, char **err_msg)
{
+ int line_num = hbaline->linenumber;
+
#ifdef USE_LDAP
hbaline->ldapscope = LDAP_SCOPE_SUBTREE;
#endif
@@ -1422,11 +1585,12 @@ parse_hba_auth_opt(char *name, char *val, HbaLine *hbaline, int line_num)
{
if (hbaline->conntype != ctHostSSL)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("clientcert can only be configured for \"hostssl\" rows"),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = "clientcert can only be configured for \"hostssl\" rows";
return false;
}
if (strcmp(val, "1") == 0)
@@ -1437,11 +1601,12 @@ parse_hba_auth_opt(char *name, char *val, HbaLine *hbaline, int line_num)
{
if (hbaline->auth_method == uaCert)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("clientcert can not be set to 0 when using \"cert\" authentication"),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = "clientcert can not be set to 0 when using \"cert\" authentication";
return false;
}
hbaline->clientcert = false;
@@ -1473,17 +1638,21 @@ parse_hba_auth_opt(char *name, char *val, HbaLine *hbaline, int line_num)
rc = ldap_url_parse(val, &urldata);
if (rc != LDAP_SUCCESS)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("could not parse LDAP URL \"%s\": %s", val, ldap_err2string(rc))));
+ *err_msg = psprintf("could not parse LDAP URL \"%s\": %s",
+ val, ldap_err2string(rc));
return false;
}
if (strcmp(urldata->lud_scheme, "ldap") != 0)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("unsupported LDAP URL scheme: %s", urldata->lud_scheme)));
+ *err_msg = psprintf("unsupported LDAP URL scheme: %s",
+ urldata->lud_scheme);
ldap_free_urldesc(urldata);
return false;
}
@@ -1497,17 +1666,19 @@ parse_hba_auth_opt(char *name, char *val, HbaLine *hbaline, int line_num)
hbaline->ldapscope = urldata->lud_scope;
if (urldata->lud_filter)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("filters not supported in LDAP URLs")));
+ *err_msg = "filters not supported in LDAP URLs";
ldap_free_urldesc(urldata);
return false;
}
ldap_free_urldesc(urldata);
#else /* not OpenLDAP */
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("LDAP URLs not supported on this platform")));
+ *err_msg = "LDAP URLs not supported on this platform";
#endif /* not OpenLDAP */
}
else if (strcmp(name, "ldaptls") == 0)
@@ -1529,11 +1700,12 @@ parse_hba_auth_opt(char *name, char *val, HbaLine *hbaline, int line_num)
hbaline->ldapport = atoi(val);
if (hbaline->ldapport == 0)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid LDAP port number: \"%s\"", val),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = psprintf("invalid LDAP port number: \"%s\"", val);
return false;
}
}
@@ -1617,12 +1789,14 @@ parse_hba_auth_opt(char *name, char *val, HbaLine *hbaline, int line_num)
ret = pg_getaddrinfo_all(val, NULL, &hints, &gai_result);
if (ret || !gai_result)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("could not translate RADIUS server name \"%s\" to address: %s",
val, gai_strerror(ret)),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = psprintf("could not translate RADIUS server name \"%s\" to address: %s",
+ val, gai_strerror(ret));
if (gai_result)
pg_freeaddrinfo_all(hints.ai_family, gai_result);
return false;
@@ -1636,11 +1810,12 @@ parse_hba_auth_opt(char *name, char *val, HbaLine *hbaline, int line_num)
hbaline->radiusport = atoi(val);
if (hbaline->radiusport == 0)
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("invalid RADIUS port number: \"%s\"", val),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = psprintf("invalid RADIUS port number: \"%s\"", val);
return false;
}
}
@@ -1656,12 +1831,14 @@ parse_hba_auth_opt(char *name, char *val, HbaLine *hbaline, int line_num)
}
else
{
- ereport(LOG,
+ ereport(elevel,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("unrecognized authentication option name: \"%s\"",
name),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
+ *err_msg = psprintf("unrecognized authentication option name: \"%s\"",
+ name);
return false;
}
return true;
@@ -1794,7 +1971,7 @@ load_hba(void)
return false;
}
- linecxt = tokenize_file(HbaFileName, file, &hba_lines);
+ linecxt = tokenize_file(HbaFileName, file, &hba_lines, LOG);
FreeFile(file);
/* Now parse all the lines */
@@ -1808,21 +1985,22 @@ load_hba(void)
TokenizedLine *tok_line = (TokenizedLine *) lfirst(line);
HbaLine *newline;
- if ((newline = parse_hba_line(tok_line)) == NULL)
+ /* don't parse lines that already have errors */
+ if (tok_line->err_msg != NULL)
{
- /*
- * Parse error in the file, so indicate there's a problem. NB: a
- * problem in a line will free the memory for all previous lines
- * as well!
- */
- MemoryContextReset(hbacxt);
- new_parsed_lines = NIL;
+ ok = false;
+ continue;
+ }
+
+ if ((newline = parse_hba_line(tok_line, LOG)) == NULL)
+ {
+ /* Parse error; remember there's trouble */
ok = false;
/*
* Keep parsing the rest of the file so we can report errors on
- * more than the first row. Error has already been reported in the
- * parsing function, so no need to log it here.
+ * more than the first line. Error has already been logged, no
+ * need for more chatter here.
*/
continue;
}
@@ -1864,11 +2042,419 @@ load_hba(void)
return true;
}
+/*
+ * This macro specifies the maximum number of authentication options
+ * that are possible with any given authentication method that is supported.
+ * Currently LDAP supports 10, so the macro value is well above the most any
+ * method needs.
+ */
+#define MAX_HBA_OPTIONS 12
+
+/*
+ * Create a text array listing the options specified in the HBA line.
+ * Return NULL if no options are specified.
+ */
+static ArrayType *
+gethba_options(HbaLine *hba)
+{
+ int noptions;
+ Datum options[MAX_HBA_OPTIONS];
+
+ noptions = 0;
+
+ if (hba->auth_method == uaGSS || hba->auth_method == uaSSPI)
+ {
+ if (hba->include_realm)
+ options[noptions++] =
+ CStringGetTextDatum("include_realm=true");
+
+ if (hba->krb_realm)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("krb_realm=%s", hba->krb_realm));
+ }
+
+ if (hba->usermap)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("map=%s", hba->usermap));
+
+ if (hba->clientcert)
+ options[noptions++] =
+ CStringGetTextDatum("clientcert=true");
+
+ if (hba->pamservice)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("pamservice=%s", hba->pamservice));
+
+ if (hba->auth_method == uaLDAP)
+ {
+ if (hba->ldapserver)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("ldapserver=%s", hba->ldapserver));
+
+ if (hba->ldapport)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("ldapport=%d", hba->ldapport));
+
+ if (hba->ldaptls)
+ options[noptions++] =
+ CStringGetTextDatum("ldaptls=true");
+
+ if (hba->ldapprefix)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("ldapprefix=%s", hba->ldapprefix));
+
+ if (hba->ldapsuffix)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("ldapsuffix=%s", hba->ldapsuffix));
+
+ if (hba->ldapbasedn)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("ldapbasedn=%s", hba->ldapbasedn));
+
+ if (hba->ldapbinddn)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("ldapbinddn=%s", hba->ldapbinddn));
+
+ if (hba->ldapbindpasswd)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("ldapbindpasswd=%s",
+ hba->ldapbindpasswd));
+
+ if (hba->ldapsearchattribute)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("ldapsearchattribute=%s",
+ hba->ldapsearchattribute));
+
+ if (hba->ldapscope)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("ldapscope=%d", hba->ldapscope));
+ }
+
+ if (hba->auth_method == uaRADIUS)
+ {
+ if (hba->radiusserver)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("radiusserver=%s", hba->radiusserver));
+
+ if (hba->radiussecret)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("radiussecret=%s", hba->radiussecret));
+
+ if (hba->radiusidentifier)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("radiusidentifier=%s", hba->radiusidentifier));
+
+ if (hba->radiusport)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("radiusport=%d", hba->radiusport));
+ }
+
+ Assert(noptions <= MAX_HBA_OPTIONS);
+
+ if (noptions > 0)
+ return construct_array(options, noptions, TEXTOID, -1, false, 'i');
+ else
+ return NULL;
+}
+
+/* Number of columns in pg_hba_file_rules view */
+#define NUM_PG_HBA_FILE_RULES_ATTS 9
+
+/*
+ * fill_hba_line: build one row of pg_hba_file_rules view, add it to tuplestore
+ *
+ * tuple_store: where to store data
+ * tupdesc: tuple descriptor for the view
+ * lineno: pg_hba.conf line number (must always be valid)
+ * hba: parsed line data (can be NULL, in which case err_msg should be set)
+ * err_msg: error message (NULL if none)
+ *
+ * Note: leaks memory, but we don't care since this is run in a short-lived
+ * memory context.
+ */
+static void
+fill_hba_line(Tuplestorestate *tuple_store, TupleDesc tupdesc,
+ int lineno, HbaLine *hba, const char *err_msg)
+{
+ Datum values[NUM_PG_HBA_FILE_RULES_ATTS];
+ bool nulls[NUM_PG_HBA_FILE_RULES_ATTS];
+ char buffer[NI_MAXHOST];
+ HeapTuple tuple;
+ int index;
+ ListCell *lc;
+ const char *typestr;
+ const char *addrstr;
+ const char *maskstr;
+ ArrayType *options;
+
+ Assert(tupdesc->natts == NUM_PG_HBA_FILE_RULES_ATTS);
+
+ memset(values, 0, sizeof(values));
+ memset(nulls, 0, sizeof(nulls));
+ index = 0;
+
+ /* line_number */
+ values[index++] = Int32GetDatum(lineno);
+
+ if (hba != NULL)
+ {
+ /* type */
+ /* Avoid a default: case so compiler will warn about missing cases */
+ typestr = NULL;
+ switch (hba->conntype)
+ {
+ case ctLocal:
+ typestr = "local";
+ break;
+ case ctHost:
+ typestr = "host";
+ break;
+ case ctHostSSL:
+ typestr = "hostssl";
+ break;
+ case ctHostNoSSL:
+ typestr = "hostnossl";
+ break;
+ }
+ if (typestr)
+ values[index++] = CStringGetTextDatum(typestr);
+ else
+ nulls[index++] = true;
+
+ /* database */
+ if (hba->databases)
+ {
+ /*
+ * Flatten HbaToken list to string list. It might seem that we
+ * should re-quote any quoted tokens, but that has been rejected
+ * on the grounds that it makes it harder to compare the array
+ * elements to other system catalogs. That makes entries like
+ * "all" or "samerole" formally ambiguous ... but users who name
+ * databases/roles that way are inflicting their own pain.
+ */
+ List *names = NIL;
+
+ foreach(lc, hba->databases)
+ {
+ HbaToken *tok = lfirst(lc);
+
+ names = lappend(names, tok->string);
+ }
+ values[index++] = PointerGetDatum(strlist_to_textarray(names));
+ }
+ else
+ nulls[index++] = true;
+
+ /* user */
+ if (hba->roles)
+ {
+ /* Flatten HbaToken list to string list; see comment above */
+ List *roles = NIL;
+
+ foreach(lc, hba->roles)
+ {
+ HbaToken *tok = lfirst(lc);
+
+ roles = lappend(roles, tok->string);
+ }
+ values[index++] = PointerGetDatum(strlist_to_textarray(roles));
+ }
+ else
+ nulls[index++] = true;
+
+ /* address and netmask */
+ /* Avoid a default: case so compiler will warn about missing cases */
+ addrstr = maskstr = NULL;
+ switch (hba->ip_cmp_method)
+ {
+ case ipCmpMask:
+ if (hba->hostname)
+ {
+ addrstr = hba->hostname;
+ }
+ else
+ {
+ if (pg_getnameinfo_all(&hba->addr, sizeof(hba->addr),
+ buffer, sizeof(buffer),
+ NULL, 0,
+ NI_NUMERICHOST) == 0)
+ {
+ clean_ipv6_addr(hba->addr.ss_family, buffer);
+ addrstr = pstrdup(buffer);
+ }
+ if (pg_getnameinfo_all(&hba->mask, sizeof(hba->mask),
+ buffer, sizeof(buffer),
+ NULL, 0,
+ NI_NUMERICHOST) == 0)
+ {
+ clean_ipv6_addr(hba->mask.ss_family, buffer);
+ maskstr = pstrdup(buffer);
+ }
+ }
+ break;
+ case ipCmpAll:
+ addrstr = "all";
+ break;
+ case ipCmpSameHost:
+ addrstr = "samehost";
+ break;
+ case ipCmpSameNet:
+ addrstr = "samenet";
+ break;
+ }
+ if (addrstr)
+ values[index++] = CStringGetTextDatum(addrstr);
+ else
+ nulls[index++] = true;
+ if (maskstr)
+ values[index++] = CStringGetTextDatum(maskstr);
+ else
+ nulls[index++] = true;
+
+ /*
+ * Make sure UserAuthName[] tracks additions to the UserAuth enum
+ */
+ StaticAssertStmt(lengthof(UserAuthName) == USER_AUTH_LAST + 1,
+ "UserAuthName[] must match the UserAuth enum");
+
+ /* auth_method */
+ values[index++] = CStringGetTextDatum(UserAuthName[hba->auth_method]);
+
+ /* options */
+ options = gethba_options(hba);
+ if (options)
+ values[index++] = PointerGetDatum(options);
+ else
+ nulls[index++] = true;
+ }
+ else
+ {
+ /* no parsing result, so set relevant fields to nulls */
+ memset(&nulls[1], true, (NUM_PG_HBA_FILE_RULES_ATTS - 2) * sizeof(bool));
+ }
+
+ /* error */
+ if (err_msg)
+ values[NUM_PG_HBA_FILE_RULES_ATTS - 1] = CStringGetTextDatum(err_msg);
+ else
+ nulls[NUM_PG_HBA_FILE_RULES_ATTS - 1] = true;
+
+ tuple = heap_form_tuple(tupdesc, values, nulls);
+ tuplestore_puttuple(tuple_store, tuple);
+}
+
+/*
+ * Read the pg_hba.conf file and fill the tuplestore with view records.
+ */
+static void
+fill_hba_view(Tuplestorestate *tuple_store, TupleDesc tupdesc)
+{
+ FILE *file;
+ List *hba_lines = NIL;
+ ListCell *line;
+ MemoryContext linecxt;
+ MemoryContext hbacxt;
+ MemoryContext oldcxt;
+
+ /*
+ * In the unlikely event that we can't open pg_hba.conf, we throw an
+ * error, rather than trying to report it via some sort of view entry.
+ * (Most other error conditions should result in a message in a view
+ * entry.)
+ */
+ file = AllocateFile(HbaFileName, "r");
+ if (file == NULL)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open configuration file \"%s\": %m",
+ HbaFileName)));
+
+ linecxt = tokenize_file(HbaFileName, file, &hba_lines, DEBUG3);
+ FreeFile(file);
+
+ /* Now parse all the lines */
+ hbacxt = AllocSetContextCreate(CurrentMemoryContext,
+ "hba parser context",
+ ALLOCSET_SMALL_SIZES);
+ oldcxt = MemoryContextSwitchTo(hbacxt);
+ foreach(line, hba_lines)
+ {
+ TokenizedLine *tok_line = (TokenizedLine *) lfirst(line);
+ HbaLine *hbaline = NULL;
+
+ /* don't parse lines that already have errors */
+ if (tok_line->err_msg == NULL)
+ hbaline = parse_hba_line(tok_line, DEBUG3);
+
+ fill_hba_line(tuple_store, tupdesc, tok_line->line_num,
+ hbaline, tok_line->err_msg);
+ }
+
+ /* Free tokenizer memory */
+ MemoryContextDelete(linecxt);
+ /* Free parse_hba_line memory */
+ MemoryContextSwitchTo(oldcxt);
+ MemoryContextDelete(hbacxt);
+}
+
+/*
+ * SQL-accessible SRF to return all the entries in the pg_hba.conf file.
+ */
+Datum
+pg_hba_file_rules(PG_FUNCTION_ARGS)
+{
+ Tuplestorestate *tuple_store;
+ TupleDesc tupdesc;
+ MemoryContext old_cxt;
+ ReturnSetInfo *rsi;
+
+ /*
+ * We must use the Materialize mode to be safe against HBA file changes
+ * while the cursor is open. It's also more efficient than having to look
+ * up our current position in the parsed list every time.
+ */
+ rsi = (ReturnSetInfo *) fcinfo->resultinfo;
+
+ /* Check to see if caller supports us returning a tuplestore */
+ if (rsi == NULL || !IsA(rsi, ReturnSetInfo))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("set-valued function called in context that cannot accept a set")));
+ if (!(rsi->allowedModes & SFRM_Materialize))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("materialize mode required, but it is not " \
+ "allowed in this context")));
+
+ rsi->returnMode = SFRM_Materialize;
+
+ /* Build a tuple descriptor for our result type */
+ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+ elog(ERROR, "return type must be a row type");
+
+ /* Build tuplestore to hold the result rows */
+ old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
+
+ tuple_store =
+ tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random,
+ false, work_mem);
+ rsi->setDesc = tupdesc;
+ rsi->setResult = tuple_store;
+
+ MemoryContextSwitchTo(old_cxt);
+
+ /* Fill the tuplestore */
+ fill_hba_view(tuple_store, tupdesc);
+
+ PG_RETURN_NULL();
+}
+
+
/*
* Parse one tokenised line from the ident config file and store the result in
* an IdentLine structure.
*
- * Return NULL if parsing fails.
+ * If parsing fails, log a message and return NULL.
*
* If ident_user is a regular expression (ie. begins with a slash), it is
* compiled and stored in IdentLine structure.
@@ -2170,7 +2756,7 @@ load_ident(void)
return false;
}
- linecxt = tokenize_file(IdentFileName, file, &ident_lines);
+ linecxt = tokenize_file(IdentFileName, file, &ident_lines, LOG);
FreeFile(file);
/* Now parse all the lines */
@@ -2183,26 +2769,22 @@ load_ident(void)
{
TokenizedLine *tok_line = (TokenizedLine *) lfirst(line_cell);
+ /* don't parse lines that already have errors */
+ if (tok_line->err_msg != NULL)
+ {
+ ok = false;
+ continue;
+ }
+
if ((newline = parse_ident_line(tok_line)) == NULL)
{
- /*
- * Parse error in the file, so indicate there's a problem. Free
- * all the memory and regular expressions of lines parsed so far.
- */
- foreach(parsed_line_cell, new_parsed_lines)
- {
- newline = (IdentLine *) lfirst(parsed_line_cell);
- if (newline->ident_user[0] == '/')
- pg_regfree(&newline->re);
- }
- MemoryContextReset(ident_context);
- new_parsed_lines = NIL;
+ /* Parse error; remember there's trouble */
ok = false;
/*
* Keep parsing the rest of the file so we can report errors on
- * more than the first row. Error has already been reported in the
- * parsing function, so no need to log it here.
+ * more than the first line. Error has already been logged, no
+ * need for more chatter here.
*/
continue;
}
@@ -2216,7 +2798,11 @@ load_ident(void)
if (!ok)
{
- /* File contained one or more errors, so bail out */
+ /*
+ * File contained one or more errors, so bail out, first being careful
+ * to clean up whatever we allocated. Most stuff will go away via
+ * MemoryContextDelete, but we have to clean up regexes explicitly.
+ */
foreach(parsed_line_cell, new_parsed_lines)
{
newline = (IdentLine *) lfirst(parsed_line_cell);
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 417cfc36ec..82511c2db8 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201701251
+#define CATALOG_VERSION_NO 201701301
#endif
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 31c828a3f2..05652e86c2 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -3076,6 +3076,8 @@ DATA(insert OID = 2084 ( pg_show_all_settings PGNSP PGUID 12 1 1000 0 0 f f f f
DESCR("SHOW ALL as a function");
DATA(insert OID = 3329 ( pg_show_all_file_settings PGNSP PGUID 12 1 1000 0 0 f f f f t t v s 0 0 2249 "" "{25,23,23,25,25,16,25}" "{o,o,o,o,o,o,o}" "{sourcefile,sourceline,seqno,name,setting,applied,error}" _null_ _null_ show_all_file_settings _null_ _null_ _null_ ));
DESCR("show config file settings");
+DATA(insert OID = 3401 ( pg_hba_file_rules PGNSP PGUID 12 1 1000 0 0 f f f f t t v s 0 0 2249 "" "{23,25,1009,1009,25,25,25,1009,25}" "{o,o,o,o,o,o,o,o,o}" "{line_number,type,database,user_name,address,netmask,auth_method,options,error}" _null_ _null_ pg_hba_file_rules _null_ _null_ _null_ ));
+DESCR("show pg_hba.conf rules");
DATA(insert OID = 1371 ( pg_lock_status PGNSP PGUID 12 1 1000 0 0 f f f f t t v s 0 0 2249 "" "{25,26,26,23,21,25,28,26,26,21,25,23,25,16,16}" "{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}" "{locktype,database,relation,page,tuple,virtualxid,transactionid,classid,objid,objsubid,virtualtransaction,pid,mode,granted,fastpath}" _null_ _null_ pg_lock_status _null_ _null_ _null_ ));
DESCR("view system lock information");
DATA(insert OID = 2561 ( pg_blocking_pids PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 1007 "23" _null_ _null_ _null_ _null_ _null_ pg_blocking_pids _null_ _null_ _null_ ));
diff --git a/src/include/libpq/hba.h b/src/include/libpq/hba.h
index dc7d2572ea..748a072854 100644
--- a/src/include/libpq/hba.h
+++ b/src/include/libpq/hba.h
@@ -16,10 +16,16 @@
#include "regex/regex.h"
+/*
+ * The following enum represents the authentication methods that
+ * are supported by PostgreSQL.
+ *
+ * Note: keep this in sync with the UserAuthName array in hba.c.
+ */
typedef enum UserAuth
{
uaReject,
- uaImplicitReject,
+ uaImplicitReject, /* Not a user-visible option */
uaTrust,
uaIdent,
uaPassword,
@@ -32,6 +38,7 @@ typedef enum UserAuth
uaCert,
uaRADIUS,
uaPeer
+#define USER_AUTH_LAST uaPeer /* Must be last value of this enum */
} UserAuth;
typedef enum IPCompareMethod
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 60abcad101..de5ae00970 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1338,6 +1338,16 @@ pg_group| SELECT pg_authid.rolname AS groname,
WHERE (pg_auth_members.roleid = pg_authid.oid)) AS grolist
FROM pg_authid
WHERE (NOT pg_authid.rolcanlogin);
+pg_hba_file_rules| SELECT a.line_number,
+ a.type,
+ a.database,
+ a.user_name,
+ a.address,
+ a.netmask,
+ a.auth_method,
+ a.options,
+ a.error
+ FROM pg_hba_file_rules() a(line_number, type, database, user_name, address, netmask, auth_method, options, error);
pg_indexes| SELECT n.nspname AS schemaname,
c.relname AS tablename,
i.relname AS indexname,
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 852a7c347e..d48abd7e09 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -39,6 +39,13 @@ select count(*) >= 0 as ok from pg_file_settings;
t
(1 row)
+-- There will surely be at least one rule
+select count(*) > 0 as ok from pg_hba_file_rules;
+ ok
+----
+ t
+(1 row)
+
-- There will surely be at least one active lock
select count(*) > 0 as ok from pg_locks;
ok
diff --git a/src/test/regress/sql/sysviews.sql b/src/test/regress/sql/sysviews.sql
index 0941b6beac..28e412b735 100644
--- a/src/test/regress/sql/sysviews.sql
+++ b/src/test/regress/sql/sysviews.sql
@@ -20,6 +20,9 @@ select count(*) = 0 as ok from pg_cursors;
select count(*) >= 0 as ok from pg_file_settings;
+-- There will surely be at least one rule
+select count(*) > 0 as ok from pg_hba_file_rules;
+
-- There will surely be at least one active lock
select count(*) > 0 as ok from pg_locks;
--
cgit v1.2.3
From 86322dc7e013b4062393dcbb74043db003e23ec5 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Wed, 1 Feb 2017 11:02:40 -0500
Subject: Improve psql's behavior for \set and \unset of its control variables.
This commit improves on the results of commit 511ae628f in two ways:
1. It restores the historical behavior that "\set FOO" is interpreted
as setting FOO to "on", if FOO is a boolean control variable. We
already found one test script that was expecting that behavior, and
the psql documentation certainly does nothing to discourage people
from assuming that would work, since it often says just "if FOO is set"
when describing the effects of a boolean variable. However, now this
case will result in actually setting FOO to "on", not an empty string.
2. It arranges for an "\unset" of a control variable to set the value
back to its default value, rather than becoming apparently undefined.
The control variables are also initialized that way at psql startup.
In combination, these things guarantee that a control variable always
has a displayable value that reflects what psql is actually doing.
That is a pretty substantial usability improvement.
The implementation involves adding a second type of variable hook function
that is able to replace a proposed new value (including NULL) with another
one. We could alternatively have complicated the API of the assign hook,
but this way seems better since many variables can share the same
substitution hook function.
Also document the actual behavior of these variables more fully,
including covering assorted behaviors that were there before but
never documented.
This patch also includes some minor cleanup that should have been in
511ae628f but was missed.
Patch by me, but it owes a lot to discussions with Daniel Vérité.
Discussion: https://postgr.es/m/9572.1485821620@sss.pgh.pa.us
---
doc/src/sgml/ref/psql-ref.sgml | 90 ++++++++++++++------
src/bin/psql/startup.c | 170 +++++++++++++++++++++++++++----------
src/bin/psql/variables.c | 144 +++++++++++++++----------------
src/bin/psql/variables.h | 35 +++++++-
src/test/regress/expected/psql.out | 17 ++++
src/test/regress/sql/psql.sql | 10 +++
6 files changed, 319 insertions(+), 147 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml
index 4e51e90906..b9c8fccde4 100644
--- a/doc/src/sgml/ref/psql-ref.sgml
+++ b/doc/src/sgml/ref/psql-ref.sgml
@@ -455,8 +455,8 @@ EOF
any, by an equal sign on the command line. To unset a variable,
leave off the equal sign. To set a variable with an empty value,
use the equal sign but leave off the value. These assignments are
- done during a very early stage of start-up, so variables reserved
- for internal purposes might get overwritten later.
+ done during command line processing, so variables that reflect
+ connection state will get overwritten later.
@@ -2692,7 +2692,7 @@ lo_import 152801
class="parameter">name to value, or if more than one value
is given, to the concatenation of all of them. If only one
- argument is given, the variable is set with an empty value. To
+ argument is given, the variable is set to an empty-string value. To
unset a variable, use the \unset command.
@@ -2709,9 +2709,11 @@ lo_import 152801
- Although you are welcome to set any variable to anything you
- want, psql treats several variables
- as special. They are documented in the section about variables.
+ Certain variables are special, in that they
+ control psql's behavior or are
+ automatically set to reflect connection state. These variables are
+ documented in , below.
@@ -2835,6 +2837,14 @@ testdb=> \setenv LESS -imx4F
Unsets (deletes) the psql> variable name.
+
+
+ Most variables that control psql's behavior
+ cannot be unset; instead, an \unset> command is interpreted
+ as setting them to their default values.
+ See , below.
+
@@ -3053,7 +3063,7 @@ bar
If you call \set without a second argument, the
- variable is set, with an empty string as value. To unset (i.e., delete)
+ variable is set to an empty-string value. To unset (i.e., delete)
a variable, use the command \unset. To show the
values of all variables, call \set without any argument.
@@ -3082,8 +3092,23 @@ bar
By convention, all specially treated variables' names
consist of all upper-case ASCII letters (and possibly digits and
underscores). To ensure maximum compatibility in the future, avoid
- using such variable names for your own purposes. A list of all specially
- treated variables follows.
+ using such variable names for your own purposes.
+
+
+
+ Variables that control psql's behavior
+ generally cannot be unset or set to invalid values. An \unset>
+ command is allowed but is interpreted as setting the variable to its
+ default value. A \set> command without a second argument is
+ interpreted as setting the variable to on>, for control
+ variables that accept that value, and is rejected for others. Also,
+ control variables that accept the values on>
+ and off> will also accept other common spellings of Boolean
+ values, such as true> and false>.
+
+
+
+ The specially treated variables are:
@@ -3153,7 +3178,7 @@ bar
The name of the database you are currently connected to. This is
set every time you connect to a database (including program
- start-up), but can be unset.
+ start-up), but can be changed or unset.
@@ -3171,8 +3196,8 @@ bar
as it is sent to the server. The switch to select this behavior is
-e
. If set to errors, then only
failed queries are displayed on standard error output. The switch
- for this behavior is
-b
. If unset, or if set to
- none, then no queries are displayed.
+ for this behavior is
-b
. If set to
+ none (the default), then no queries are displayed.
@@ -3187,8 +3212,9 @@ bar
PostgreSQL internals and provide
similar functionality in your own programs. (To select this behavior
on program start-up, use the switch
-E
.) If you set
- the variable to the value noexec, the queries are
+ this variable to the value noexec, the queries are
just shown but are not actually sent to the server and executed.
+ The default value is off>.
@@ -3200,7 +3226,7 @@ bar
The current client character set encoding.
This is set every time you connect to a database (including
program start-up), and when you change the encoding
- with \encoding>, but it can be unset.
+ with \encoding>, but it can be changed or unset.
@@ -3209,7 +3235,7 @@ bar
FETCH_COUNT
- If this variable is set to an integer value > 0,
+ If this variable is set to an integer value greater than zero,
the results of SELECT queries are fetched
and displayed in groups of that many rows, rather than the
default behavior of collecting the entire result set before
@@ -3220,6 +3246,13 @@ bar
Keep in mind that when using this feature, a query might
fail after having already displayed some rows.
+
+
+ FETCH_COUNT is ignored if it is unset or does not
+ have a positive value. It cannot be set to a value that is not
+ syntactically an integer.
+
+
Although you can use any output format with this feature,
@@ -3241,7 +3274,7 @@ bar
list. If set to a value of ignoredups, lines
matching the previous history line are not entered. A value of
ignoreboth combines the two options. If
- unset, or if set to none (the default), all lines
+ set to none (the default), all lines
read in interactive mode are saved on the history list.
@@ -3257,8 +3290,12 @@ bar
HISTFILE
- The file name that will be used to store the history list. The default
- value is ~/.psql_history. For example, putting:
+ The file name that will be used to store the history list. If unset,
+ the file name is taken from the PSQL_HISTORY
+ environment variable. If that is not set either, the default
+ is ~/.psql_history,
+ or %APPDATA%\postgresql\psql_history on Windows.
+ For example, putting:
\set HISTFILE ~/.psql_history- :DBNAME
@@ -3279,8 +3316,10 @@ bar
HISTSIZE
- The number of commands to store in the command history. The
- default value is 500.
+ The maximum number of commands to store in the command history.
+ If unset, at most 500 commands are stored by default.
+ If set to a value that is negative or not an integer, no limit is
+ applied.
@@ -3297,7 +3336,7 @@ bar
The database server host you are currently connected to. This is
set every time you connect to a database (including program
- start-up), but can be unset.
+ start-up), but can be changed or unset.
@@ -3350,7 +3389,7 @@ bar
generates an error, the error is ignored and the transaction
continues. When set to interactive>, such errors are only
ignored in interactive sessions, and not when reading script
- files. When unset or set to off>, a statement in a
+ files. When set to off> (the default), a statement in a
transaction block that generates an error aborts the entire
transaction. The error rollback mode works by issuing an
implicit SAVEPOINT> for you, just before each command
@@ -3385,7 +3424,7 @@ bar
The database server port to which you are currently connected.
This is set every time you connect to a database (including
- program start-up), but can be unset.
+ program start-up), but can be changed or unset.
@@ -3458,7 +3497,7 @@ bar
The database user you are currently connected as. This is set
every time you connect to a database (including program
- start-up), but can be unset.
+ start-up), but can be changed or unset.
@@ -3481,7 +3520,7 @@ bar
This variable is set at program start-up to
- reflect psql>'s version. It can be unset or changed.
+ reflect psql>'s version. It can be changed or unset.
@@ -4015,6 +4054,7 @@ PSQL_EDITOR_LINENUMBER_ARG='--line '
The location of the history file can be set explicitly via
+ the HISTFILEpsql> variable or
the PSQL_HISTORY environment variable.
diff --git a/src/bin/psql/startup.c b/src/bin/psql/startup.c
index 0574b5bdfb..a3654e6272 100644
--- a/src/bin/psql/startup.c
+++ b/src/bin/psql/startup.c
@@ -166,10 +166,8 @@ main(int argc, char *argv[])
SetVariable(pset.vars, "VERSION", PG_VERSION_STR);
- /* Default values for variables */
+ /* Default values for variables (that don't match the result of \unset) */
SetVariableBool(pset.vars, "AUTOCOMMIT");
- SetVariable(pset.vars, "VERBOSITY", "default");
- SetVariable(pset.vars, "SHOW_CONTEXT", "errors");
SetVariable(pset.vars, "PROMPT1", DEFAULT_PROMPT1);
SetVariable(pset.vars, "PROMPT2", DEFAULT_PROMPT2);
SetVariable(pset.vars, "PROMPT3", DEFAULT_PROMPT3);
@@ -578,17 +576,13 @@ parse_psql_options(int argc, char *argv[], struct adhoc_opts * options)
if (!equal_loc)
{
if (!DeleteVariable(pset.vars, value))
- {
- fprintf(stderr, _("%s: could not delete variable \"%s\"\n"),
- pset.progname, value);
- exit(EXIT_FAILURE);
- }
+ exit(EXIT_FAILURE); /* error already printed */
}
else
{
*equal_loc = '\0';
if (!SetVariable(pset.vars, value, equal_loc + 1))
- exit(EXIT_FAILURE);
+ exit(EXIT_FAILURE); /* error already printed */
}
free(value);
@@ -777,11 +771,28 @@ showVersion(void)
/*
- * Assign hooks for psql variables.
+ * Substitute hooks and assign hooks for psql variables.
*
* This isn't an amazingly good place for them, but neither is anywhere else.
*/
+static char *
+bool_substitute_hook(char *newval)
+{
+ if (newval == NULL)
+ {
+ /* "\unset FOO" becomes "\set FOO off" */
+ newval = pg_strdup("off");
+ }
+ else if (newval[0] == '\0')
+ {
+ /* "\set FOO" becomes "\set FOO on" */
+ pg_free(newval);
+ newval = pg_strdup("on");
+ }
+ return newval;
+}
+
static bool
autocommit_hook(const char *newval)
{
@@ -822,12 +833,19 @@ fetch_count_hook(const char *newval)
return true;
}
+static char *
+echo_substitute_hook(char *newval)
+{
+ if (newval == NULL)
+ newval = pg_strdup("none");
+ return newval;
+}
+
static bool
echo_hook(const char *newval)
{
- if (newval == NULL)
- pset.echo = PSQL_ECHO_NONE;
- else if (pg_strcasecmp(newval, "queries") == 0)
+ Assert(newval != NULL); /* else substitute hook messed up */
+ if (pg_strcasecmp(newval, "queries") == 0)
pset.echo = PSQL_ECHO_QUERIES;
else if (pg_strcasecmp(newval, "errors") == 0)
pset.echo = PSQL_ECHO_ERRORS;
@@ -846,9 +864,8 @@ echo_hook(const char *newval)
static bool
echo_hidden_hook(const char *newval)
{
- if (newval == NULL)
- pset.echo_hidden = PSQL_ECHO_HIDDEN_OFF;
- else if (pg_strcasecmp(newval, "noexec") == 0)
+ Assert(newval != NULL); /* else substitute hook messed up */
+ if (pg_strcasecmp(newval, "noexec") == 0)
pset.echo_hidden = PSQL_ECHO_HIDDEN_NOEXEC;
else
{
@@ -868,9 +885,8 @@ echo_hidden_hook(const char *newval)
static bool
on_error_rollback_hook(const char *newval)
{
- if (newval == NULL)
- pset.on_error_rollback = PSQL_ERROR_ROLLBACK_OFF;
- else if (pg_strcasecmp(newval, "interactive") == 0)
+ Assert(newval != NULL); /* else substitute hook messed up */
+ if (pg_strcasecmp(newval, "interactive") == 0)
pset.on_error_rollback = PSQL_ERROR_ROLLBACK_INTERACTIVE;
else
{
@@ -887,12 +903,19 @@ on_error_rollback_hook(const char *newval)
return true;
}
+static char *
+comp_keyword_case_substitute_hook(char *newval)
+{
+ if (newval == NULL)
+ newval = pg_strdup("preserve-upper");
+ return newval;
+}
+
static bool
comp_keyword_case_hook(const char *newval)
{
- if (newval == NULL)
- pset.comp_case = PSQL_COMP_CASE_PRESERVE_UPPER;
- else if (pg_strcasecmp(newval, "preserve-upper") == 0)
+ Assert(newval != NULL); /* else substitute hook messed up */
+ if (pg_strcasecmp(newval, "preserve-upper") == 0)
pset.comp_case = PSQL_COMP_CASE_PRESERVE_UPPER;
else if (pg_strcasecmp(newval, "preserve-lower") == 0)
pset.comp_case = PSQL_COMP_CASE_PRESERVE_LOWER;
@@ -909,12 +932,19 @@ comp_keyword_case_hook(const char *newval)
return true;
}
+static char *
+histcontrol_substitute_hook(char *newval)
+{
+ if (newval == NULL)
+ newval = pg_strdup("none");
+ return newval;
+}
+
static bool
histcontrol_hook(const char *newval)
{
- if (newval == NULL)
- pset.histcontrol = hctl_none;
- else if (pg_strcasecmp(newval, "ignorespace") == 0)
+ Assert(newval != NULL); /* else substitute hook messed up */
+ if (pg_strcasecmp(newval, "ignorespace") == 0)
pset.histcontrol = hctl_ignorespace;
else if (pg_strcasecmp(newval, "ignoredups") == 0)
pset.histcontrol = hctl_ignoredups;
@@ -952,12 +982,19 @@ prompt3_hook(const char *newval)
return true;
}
+static char *
+verbosity_substitute_hook(char *newval)
+{
+ if (newval == NULL)
+ newval = pg_strdup("default");
+ return newval;
+}
+
static bool
verbosity_hook(const char *newval)
{
- if (newval == NULL)
- pset.verbosity = PQERRORS_DEFAULT;
- else if (pg_strcasecmp(newval, "default") == 0)
+ Assert(newval != NULL); /* else substitute hook messed up */
+ if (pg_strcasecmp(newval, "default") == 0)
pset.verbosity = PQERRORS_DEFAULT;
else if (pg_strcasecmp(newval, "terse") == 0)
pset.verbosity = PQERRORS_TERSE;
@@ -974,12 +1011,19 @@ verbosity_hook(const char *newval)
return true;
}
+static char *
+show_context_substitute_hook(char *newval)
+{
+ if (newval == NULL)
+ newval = pg_strdup("errors");
+ return newval;
+}
+
static bool
show_context_hook(const char *newval)
{
- if (newval == NULL)
- pset.show_context = PQSHOW_CONTEXT_ERRORS;
- else if (pg_strcasecmp(newval, "never") == 0)
+ Assert(newval != NULL); /* else substitute hook messed up */
+ if (pg_strcasecmp(newval, "never") == 0)
pset.show_context = PQSHOW_CONTEXT_NEVER;
else if (pg_strcasecmp(newval, "errors") == 0)
pset.show_context = PQSHOW_CONTEXT_ERRORS;
@@ -1002,20 +1046,52 @@ EstablishVariableSpace(void)
{
pset.vars = CreateVariableSpace();
- SetVariableAssignHook(pset.vars, "AUTOCOMMIT", autocommit_hook);
- SetVariableAssignHook(pset.vars, "ON_ERROR_STOP", on_error_stop_hook);
- SetVariableAssignHook(pset.vars, "QUIET", quiet_hook);
- SetVariableAssignHook(pset.vars, "SINGLELINE", singleline_hook);
- SetVariableAssignHook(pset.vars, "SINGLESTEP", singlestep_hook);
- SetVariableAssignHook(pset.vars, "FETCH_COUNT", fetch_count_hook);
- SetVariableAssignHook(pset.vars, "ECHO", echo_hook);
- SetVariableAssignHook(pset.vars, "ECHO_HIDDEN", echo_hidden_hook);
- SetVariableAssignHook(pset.vars, "ON_ERROR_ROLLBACK", on_error_rollback_hook);
- SetVariableAssignHook(pset.vars, "COMP_KEYWORD_CASE", comp_keyword_case_hook);
- SetVariableAssignHook(pset.vars, "HISTCONTROL", histcontrol_hook);
- SetVariableAssignHook(pset.vars, "PROMPT1", prompt1_hook);
- SetVariableAssignHook(pset.vars, "PROMPT2", prompt2_hook);
- SetVariableAssignHook(pset.vars, "PROMPT3", prompt3_hook);
- SetVariableAssignHook(pset.vars, "VERBOSITY", verbosity_hook);
- SetVariableAssignHook(pset.vars, "SHOW_CONTEXT", show_context_hook);
+ SetVariableHooks(pset.vars, "AUTOCOMMIT",
+ bool_substitute_hook,
+ autocommit_hook);
+ SetVariableHooks(pset.vars, "ON_ERROR_STOP",
+ bool_substitute_hook,
+ on_error_stop_hook);
+ SetVariableHooks(pset.vars, "QUIET",
+ bool_substitute_hook,
+ quiet_hook);
+ SetVariableHooks(pset.vars, "SINGLELINE",
+ bool_substitute_hook,
+ singleline_hook);
+ SetVariableHooks(pset.vars, "SINGLESTEP",
+ bool_substitute_hook,
+ singlestep_hook);
+ SetVariableHooks(pset.vars, "FETCH_COUNT",
+ NULL,
+ fetch_count_hook);
+ SetVariableHooks(pset.vars, "ECHO",
+ echo_substitute_hook,
+ echo_hook);
+ SetVariableHooks(pset.vars, "ECHO_HIDDEN",
+ bool_substitute_hook,
+ echo_hidden_hook);
+ SetVariableHooks(pset.vars, "ON_ERROR_ROLLBACK",
+ bool_substitute_hook,
+ on_error_rollback_hook);
+ SetVariableHooks(pset.vars, "COMP_KEYWORD_CASE",
+ comp_keyword_case_substitute_hook,
+ comp_keyword_case_hook);
+ SetVariableHooks(pset.vars, "HISTCONTROL",
+ histcontrol_substitute_hook,
+ histcontrol_hook);
+ SetVariableHooks(pset.vars, "PROMPT1",
+ NULL,
+ prompt1_hook);
+ SetVariableHooks(pset.vars, "PROMPT2",
+ NULL,
+ prompt2_hook);
+ SetVariableHooks(pset.vars, "PROMPT3",
+ NULL,
+ prompt3_hook);
+ SetVariableHooks(pset.vars, "VERBOSITY",
+ verbosity_substitute_hook,
+ verbosity_hook);
+ SetVariableHooks(pset.vars, "SHOW_CONTEXT",
+ show_context_substitute_hook,
+ show_context_hook);
}
diff --git a/src/bin/psql/variables.c b/src/bin/psql/variables.c
index 91e4ae8095..b9b8fcb41d 100644
--- a/src/bin/psql/variables.c
+++ b/src/bin/psql/variables.c
@@ -52,6 +52,7 @@ CreateVariableSpace(void)
ptr = pg_malloc(sizeof *ptr);
ptr->name = NULL;
ptr->value = NULL;
+ ptr->substitute_hook = NULL;
ptr->assign_hook = NULL;
ptr->next = NULL;
@@ -101,11 +102,9 @@ ParseVariableBool(const char *value, const char *name, bool *result)
size_t len;
bool valid = true;
+ /* Treat "unset" as an empty string, which will lead to error below */
if (value == NULL)
- {
- *result = false; /* not set -> assume "off" */
- return valid;
- }
+ value = "";
len = strlen(value);
@@ -152,8 +151,10 @@ ParseVariableNum(const char *value, const char *name, int *result)
char *end;
long numval;
+ /* Treat "unset" as an empty string, which will lead to error below */
if (value == NULL)
- return false;
+ value = "";
+
errno = 0;
numval = strtol(value, &end, 0);
if (errno == 0 && *end == '\0' && end != value && numval == (int) numval)
@@ -235,13 +236,13 @@ SetVariable(VariableSpace space, const char *name, const char *value)
if (!valid_variable_name(name))
{
+ /* Deletion of non-existent variable is not an error */
+ if (!value)
+ return true;
psql_error("invalid variable name: \"%s\"\n", name);
return false;
}
- if (!value)
- return DeleteVariable(space, name);
-
for (previous = space, current = space->next;
current;
previous = current, current = current->next)
@@ -249,14 +250,20 @@ SetVariable(VariableSpace space, const char *name, const char *value)
if (strcmp(current->name, name) == 0)
{
/*
- * Found entry, so update, unless hook returns false. The hook
- * may need the passed value to have the same lifespan as the
- * variable, so allocate it right away, even though we'll have to
- * free it again if the hook returns false.
+ * Found entry, so update, unless assign hook returns false.
+ *
+ * We must duplicate the passed value to start with. This
+ * simplifies the API for substitute hooks. Moreover, some assign
+ * hooks assume that the passed value has the same lifespan as the
+ * variable. Having to free the string again on failure is a
+ * small price to pay for keeping these APIs simple.
*/
- char *new_value = pg_strdup(value);
+ char *new_value = value ? pg_strdup(value) : NULL;
bool confirmed;
+ if (current->substitute_hook)
+ new_value = (*current->substitute_hook) (new_value);
+
if (current->assign_hook)
confirmed = (*current->assign_hook) (new_value);
else
@@ -267,39 +274,61 @@ SetVariable(VariableSpace space, const char *name, const char *value)
if (current->value)
pg_free(current->value);
current->value = new_value;
+
+ /*
+ * If we deleted the value, and there are no hooks to
+ * remember, we can discard the variable altogether.
+ */
+ if (new_value == NULL &&
+ current->substitute_hook == NULL &&
+ current->assign_hook == NULL)
+ {
+ previous->next = current->next;
+ free(current->name);
+ free(current);
+ }
}
- else
+ else if (new_value)
pg_free(new_value); /* current->value is left unchanged */
return confirmed;
}
}
- /* not present, make new entry */
- current = pg_malloc(sizeof *current);
- current->name = pg_strdup(name);
- current->value = pg_strdup(value);
- current->assign_hook = NULL;
- current->next = NULL;
- previous->next = current;
+ /* not present, make new entry ... unless we were asked to delete */
+ if (value)
+ {
+ current = pg_malloc(sizeof *current);
+ current->name = pg_strdup(name);
+ current->value = pg_strdup(value);
+ current->substitute_hook = NULL;
+ current->assign_hook = NULL;
+ current->next = NULL;
+ previous->next = current;
+ }
return true;
}
/*
- * Attach an assign hook function to the named variable.
+ * Attach substitute and/or assign hook functions to the named variable.
+ * If you need only one hook, pass NULL for the other.
*
- * If the variable doesn't already exist, create it with value NULL,
- * just so we have a place to store the hook function. (Externally,
- * this isn't different from it not being defined.)
+ * If the variable doesn't already exist, create it with value NULL, just so
+ * we have a place to store the hook function(s). (The substitute hook might
+ * immediately change the NULL to something else; if not, this state is
+ * externally the same as the variable not being defined.)
*
- * The hook is immediately called on the variable's current value. This is
- * meant to let it update any derived psql state. If the hook doesn't like
- * the current value, it will print a message to that effect, but we'll ignore
- * it. Generally we do not expect any such failure here, because this should
- * get called before any user-supplied value is assigned.
+ * The substitute hook, if given, is immediately called on the variable's
+ * value. Then the assign hook, if given, is called on the variable's value.
+ * This is meant to let it update any derived psql state. If the assign hook
+ * doesn't like the current value, it will print a message to that effect,
+ * but we'll ignore it. Generally we do not expect any such failure here,
+ * because this should get called before any user-supplied value is assigned.
*/
void
-SetVariableAssignHook(VariableSpace space, const char *name, VariableAssignHook hook)
+SetVariableHooks(VariableSpace space, const char *name,
+ VariableSubstituteHook shook,
+ VariableAssignHook ahook)
{
struct _variable *current,
*previous;
@@ -317,8 +346,12 @@ SetVariableAssignHook(VariableSpace space, const char *name, VariableAssignHook
if (strcmp(current->name, name) == 0)
{
/* found entry, so update */
- current->assign_hook = hook;
- (void) (*hook) (current->value);
+ current->substitute_hook = shook;
+ current->assign_hook = ahook;
+ if (shook)
+ current->value = (*shook) (current->value);
+ if (ahook)
+ (void) (*ahook) (current->value);
return;
}
}
@@ -327,10 +360,14 @@ SetVariableAssignHook(VariableSpace space, const char *name, VariableAssignHook
current = pg_malloc(sizeof *current);
current->name = pg_strdup(name);
current->value = NULL;
- current->assign_hook = hook;
+ current->substitute_hook = shook;
+ current->assign_hook = ahook;
current->next = NULL;
previous->next = current;
- (void) (*hook) (NULL);
+ if (shook)
+ current->value = (*shook) (current->value);
+ if (ahook)
+ (void) (*ahook) (current->value);
}
/*
@@ -351,42 +388,7 @@ SetVariableBool(VariableSpace space, const char *name)
bool
DeleteVariable(VariableSpace space, const char *name)
{
- struct _variable *current,
- *previous;
-
- if (!space)
- return true;
-
- for (previous = space, current = space->next;
- current;
- previous = current, current = current->next)
- {
- if (strcmp(current->name, name) == 0)
- {
- if (current->assign_hook)
- {
- /* Allow deletion only if hook is okay with NULL value */
- if (!(*current->assign_hook) (NULL))
- return false; /* message printed by hook */
- if (current->value)
- free(current->value);
- current->value = NULL;
- /* Don't delete entry, or we'd forget the hook function */
- }
- else
- {
- /* We can delete the entry as well as its value */
- if (current->value)
- free(current->value);
- previous->next = current->next;
- free(current->name);
- free(current);
- }
- return true;
- }
- }
-
- return true;
+ return SetVariable(space, name, NULL);
}
/*
diff --git a/src/bin/psql/variables.h b/src/bin/psql/variables.h
index 274b4af553..84be780509 100644
--- a/src/bin/psql/variables.h
+++ b/src/bin/psql/variables.h
@@ -18,28 +18,52 @@
* prevent invalid values from being assigned, and can update internal C
* variables to keep them in sync with the variable's current value.
*
- * A hook function is called before any attempted assignment, with the
+ * An assign hook function is called before any attempted assignment, with the
* proposed new value of the variable (or with NULL, if an \unset is being
* attempted). If it returns false, the assignment doesn't occur --- it
* should print an error message with psql_error() to tell the user why.
*
- * When a hook function is installed with SetVariableAssignHook(), it is
+ * When an assign hook function is installed with SetVariableHooks(), it is
* called with the variable's current value (or with NULL, if it wasn't set
* yet). But its return value is ignored in this case. The hook should be
* set before any possibly-invalid value can be assigned.
*/
typedef bool (*VariableAssignHook) (const char *newval);
+/*
+ * Variables can also be given "substitute hook" functions. The substitute
+ * hook can replace values (including NULL) with other values, allowing
+ * normalization of variable contents. For example, for a boolean variable,
+ * we wish to interpret "\unset FOO" as "\set FOO off", and we can do that
+ * by installing a substitute hook. (We can use the same substitute hook
+ * for all bool or nearly-bool variables, which is why this responsibility
+ * isn't part of the assign hook.)
+ *
+ * The substitute hook is called before any attempted assignment, and before
+ * the assign hook if any, passing the proposed new value of the variable as a
+ * malloc'd string (or NULL, if an \unset is being attempted). It can return
+ * the same value, or a different malloc'd string, or modify the string
+ * in-place. It should free the passed-in value if it's not returning it.
+ * The substitute hook generally should not complain about erroneous values;
+ * that's a job for the assign hook.
+ *
+ * When a substitute hook is installed with SetVariableHooks(), it is applied
+ * to the variable's current value (typically NULL, if it wasn't set yet).
+ * That also happens before applying the assign hook.
+ */
+typedef char *(*VariableSubstituteHook) (char *newval);
+
/*
* Data structure representing one variable.
*
* Note: if value == NULL then the variable is logically unset, but we are
- * keeping the struct around so as not to forget about its hook function.
+ * keeping the struct around so as not to forget about its hook function(s).
*/
struct _variable
{
char *name;
char *value;
+ VariableSubstituteHook substitute_hook;
VariableAssignHook assign_hook;
struct _variable *next;
};
@@ -65,10 +89,13 @@ int GetVariableNum(VariableSpace space,
void PrintVariables(VariableSpace space);
bool SetVariable(VariableSpace space, const char *name, const char *value);
-void SetVariableAssignHook(VariableSpace space, const char *name, VariableAssignHook hook);
bool SetVariableBool(VariableSpace space, const char *name);
bool DeleteVariable(VariableSpace space, const char *name);
+void SetVariableHooks(VariableSpace space, const char *name,
+ VariableSubstituteHook shook,
+ VariableAssignHook ahook);
+
void PsqlVarEnumError(const char *name, const char *value, const char *suggestions);
#endif /* VARIABLES_H */
diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out
index 420825aa56..026a4f0c83 100644
--- a/src/test/regress/expected/psql.out
+++ b/src/test/regress/expected/psql.out
@@ -11,6 +11,23 @@ invalid variable name: "invalid/name"
unrecognized value "foo" for "AUTOCOMMIT": boolean expected
\set FETCH_COUNT foo
invalid value "foo" for "FETCH_COUNT": integer expected
+-- check handling of built-in boolean variable
+\echo :ON_ERROR_ROLLBACK
+off
+\set ON_ERROR_ROLLBACK
+\echo :ON_ERROR_ROLLBACK
+on
+\set ON_ERROR_ROLLBACK foo
+unrecognized value "foo" for "ON_ERROR_ROLLBACK"
+Available values are: on, off, interactive.
+\echo :ON_ERROR_ROLLBACK
+on
+\set ON_ERROR_ROLLBACK on
+\echo :ON_ERROR_ROLLBACK
+on
+\unset ON_ERROR_ROLLBACK
+\echo :ON_ERROR_ROLLBACK
+off
-- \gset
select 10 as test01, 20 as test02, 'Hello' as test03 \gset pref01_
\echo :pref01_test01 :pref01_test02 :pref01_test03
diff --git a/src/test/regress/sql/psql.sql b/src/test/regress/sql/psql.sql
index 79624b9193..d823d11b95 100644
--- a/src/test/regress/sql/psql.sql
+++ b/src/test/regress/sql/psql.sql
@@ -10,6 +10,16 @@
-- fail: invalid value for special variable
\set AUTOCOMMIT foo
\set FETCH_COUNT foo
+-- check handling of built-in boolean variable
+\echo :ON_ERROR_ROLLBACK
+\set ON_ERROR_ROLLBACK
+\echo :ON_ERROR_ROLLBACK
+\set ON_ERROR_ROLLBACK foo
+\echo :ON_ERROR_ROLLBACK
+\set ON_ERROR_ROLLBACK on
+\echo :ON_ERROR_ROLLBACK
+\unset ON_ERROR_ROLLBACK
+\echo :ON_ERROR_ROLLBACK
-- \gset
--
cgit v1.2.3
From f1169ab501ce90e035a7c6489013a1d4c250ac92 Mon Sep 17 00:00:00 2001
From: Andrew Dunstan
Date: Wed, 1 Feb 2017 17:52:35 -0500
Subject: Don't count background workers against a user's connection limit.
Doing so doesn't seem to be within the purpose of the per user
connection limits, and has particularly unfortunate effects in
conjunction with parallel queries.
Backpatch to 9.6 where parallel queries were introduced.
David Rowley, reviewed by Robert Haas and Albe Laurenz.
---
doc/src/sgml/ref/create_database.sgml | 3 ++-
doc/src/sgml/ref/create_role.sgml | 5 ++++-
src/backend/access/transam/twophase.c | 1 +
src/backend/storage/ipc/procarray.c | 34 ++++++++++++++++++++++++++++++++++
src/backend/storage/lmgr/proc.c | 2 ++
src/backend/utils/init/postinit.c | 2 +-
src/include/storage/proc.h | 2 ++
src/include/storage/procarray.h | 1 +
8 files changed, 47 insertions(+), 3 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/create_database.sgml b/doc/src/sgml/ref/create_database.sgml
index cf33746c1e..cf0d53b301 100644
--- a/doc/src/sgml/ref/create_database.sgml
+++ b/doc/src/sgml/ref/create_database.sgml
@@ -258,7 +258,8 @@ CREATE DATABASE name
The CONNECTION LIMIT> option is only enforced approximately;
if two new sessions start at about the same time when just one
connection slot> remains for the database, it is possible that
- both will fail. Also, the limit is not enforced against superusers.
+ both will fail. Also, the limit is not enforced against superusers or
+ background worker processes.
diff --git a/doc/src/sgml/ref/create_role.sgml b/doc/src/sgml/ref/create_role.sgml
index a3b8ed9ab4..2ae576ede6 100644
--- a/doc/src/sgml/ref/create_role.sgml
+++ b/doc/src/sgml/ref/create_role.sgml
@@ -198,7 +198,10 @@ CREATE ROLE name [ [ WITH ]
If role can log in, this specifies how many concurrent connections
- the role can make. -1 (the default) means no limit.
+ the role can make. -1 (the default) means no limit. Note that only
+ normal connections are counted towards this limit. Neither prepared
+ transactions nor background worker connections are counted towards
+ this limit.
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 5b72c1dcf5..6fde2bd8bf 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -420,6 +420,7 @@ MarkAsPreparing(TransactionId xid, const char *gid,
proc->backendId = InvalidBackendId;
proc->databaseId = databaseid;
proc->roleId = owner;
+ proc->isBackgroundWorker = false;
proc->lwWaiting = false;
proc->lwWaitMode = 0;
proc->waitLock = NULL;
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 3f47b984ee..cd14667c16 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -2744,6 +2744,38 @@ CountDBBackends(Oid databaseid)
return count;
}
+/*
+ * CountDBConnections --- counts database backends ignoring any background
+ * worker processes
+ */
+int
+CountDBConnections(Oid databaseid)
+{
+ ProcArrayStruct *arrayP = procArray;
+ int count = 0;
+ int index;
+
+ LWLockAcquire(ProcArrayLock, LW_SHARED);
+
+ for (index = 0; index < arrayP->numProcs; index++)
+ {
+ int pgprocno = arrayP->pgprocnos[index];
+ volatile PGPROC *proc = &allProcs[pgprocno];
+
+ if (proc->pid == 0)
+ continue; /* do not count prepared xacts */
+ if (proc->isBackgroundWorker)
+ continue; /* do not count background workers */
+ if (!OidIsValid(databaseid) ||
+ proc->databaseId == databaseid)
+ count++;
+ }
+
+ LWLockRelease(ProcArrayLock);
+
+ return count;
+}
+
/*
* CancelDBBackends --- cancel backends that are using specified database
*/
@@ -2803,6 +2835,8 @@ CountUserBackends(Oid roleid)
if (proc->pid == 0)
continue; /* do not count prepared xacts */
+ if (proc->isBackgroundWorker)
+ continue; /* do not count background workers */
if (proc->roleId == roleid)
count++;
}
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 1b836f7c0a..8f467bef50 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -370,6 +370,7 @@ InitProcess(void)
MyProc->backendId = InvalidBackendId;
MyProc->databaseId = InvalidOid;
MyProc->roleId = InvalidOid;
+ MyProc->isBackgroundWorker = IsBackgroundWorker;
MyPgXact->delayChkpt = false;
MyPgXact->vacuumFlags = 0;
/* NB -- autovac launcher intentionally does not set IS_AUTOVACUUM */
@@ -542,6 +543,7 @@ InitAuxiliaryProcess(void)
MyProc->backendId = InvalidBackendId;
MyProc->databaseId = InvalidOid;
MyProc->roleId = InvalidOid;
+ MyProc->isBackgroundWorker = IsBackgroundWorker;
MyPgXact->delayChkpt = false;
MyPgXact->vacuumFlags = 0;
MyProc->lwWaiting = false;
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 21fdc6df6b..4d0a2a7bed 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -350,7 +350,7 @@ CheckMyDatabase(const char *name, bool am_superuser)
*/
if (dbform->datconnlimit >= 0 &&
!am_superuser &&
- CountDBBackends(MyDatabaseId) > dbform->datconnlimit)
+ CountDBConnections(MyDatabaseId) > dbform->datconnlimit)
ereport(FATAL,
(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
errmsg("too many connections for database \"%s\"",
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index 398fa8afde..5f38fa6b4f 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -103,6 +103,8 @@ struct PGPROC
Oid databaseId; /* OID of database this backend is using */
Oid roleId; /* OID of role using this backend */
+ bool isBackgroundWorker; /* true if background worker. */
+
/*
* While in hot standby mode, shows that a conflict signal has been sent
* for the current transaction. Set/cleared while holding ProcArrayLock,
diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h
index 0d5027fa64..9d5a13eb3b 100644
--- a/src/include/storage/procarray.h
+++ b/src/include/storage/procarray.h
@@ -73,6 +73,7 @@ extern pid_t CancelVirtualTransaction(VirtualTransactionId vxid, ProcSignalReaso
extern bool MinimumActiveBackends(int min);
extern int CountDBBackends(Oid databaseid);
+extern int CountDBConnections(Oid databaseid);
extern void CancelDBBackends(Oid databaseid, ProcSignalReason sigmode, bool conflictPending);
extern int CountUserBackends(Oid roleid);
extern bool CountOtherDBBackends(Oid databaseId,
--
cgit v1.2.3
From 08bf6e529587e1e9075d013d859af2649c32a511 Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Thu, 2 Feb 2017 14:12:58 -0500
Subject: pageinspect: Support hash indexes.
Patch by Jesper Pedersen and Ashutosh Sharma, with some error handling
improvements by me. Tests from Peter Eisentraut. Reviewed by Álvaro
Herrera, Michael Paquier, Jesper Pedersen, Jeff Janes, Peter
Eisentraut, Amit Kapila, Mithun Cy, and me.
Discussion: http://postgr.es/m/e2ac6c58-b93f-9dd9-f4e6-d6d30add7fdf@redhat.com
---
contrib/pageinspect/Makefile | 12 +-
contrib/pageinspect/expected/hash.out | 150 +++++++
contrib/pageinspect/hashfuncs.c | 559 ++++++++++++++++++++++++++
contrib/pageinspect/pageinspect--1.5--1.6.sql | 77 ++++
contrib/pageinspect/pageinspect.control | 2 +-
contrib/pageinspect/sql/hash.sql | 49 +++
doc/src/sgml/pageinspect.sgml | 144 +++++++
src/backend/access/hash/hashovfl.c | 8 +-
src/include/access/hash.h | 4 +
9 files changed, 995 insertions(+), 10 deletions(-)
create mode 100644 contrib/pageinspect/expected/hash.out
create mode 100644 contrib/pageinspect/hashfuncs.c
create mode 100644 contrib/pageinspect/pageinspect--1.5--1.6.sql
create mode 100644 contrib/pageinspect/sql/hash.sql
(limited to 'doc/src')
diff --git a/contrib/pageinspect/Makefile b/contrib/pageinspect/Makefile
index 87a28e98c2..0a3cbeeb10 100644
--- a/contrib/pageinspect/Makefile
+++ b/contrib/pageinspect/Makefile
@@ -2,16 +2,16 @@
MODULE_big = pageinspect
OBJS = rawpage.o heapfuncs.o btreefuncs.o fsmfuncs.o \
- brinfuncs.o ginfuncs.o $(WIN32RES)
+ brinfuncs.o ginfuncs.o hashfuncs.o $(WIN32RES)
EXTENSION = pageinspect
-DATA = pageinspect--1.5.sql pageinspect--1.4--1.5.sql \
- pageinspect--1.3--1.4.sql pageinspect--1.2--1.3.sql \
- pageinspect--1.1--1.2.sql pageinspect--1.0--1.1.sql \
- pageinspect--unpackaged--1.0.sql
+DATA = pageinspect--1.5.sql pageinspect--1.5--1.6.sql \
+ pageinspect--1.4--1.5.sql pageinspect--1.3--1.4.sql \
+ pageinspect--1.2--1.3.sql pageinspect--1.1--1.2.sql \
+ pageinspect--1.0--1.1.sql pageinspect--unpackaged--1.0.sql
PGFILEDESC = "pageinspect - functions to inspect contents of database pages"
-REGRESS = page btree brin gin
+REGRESS = page btree brin gin hash
ifdef USE_PGXS
PG_CONFIG = pg_config
diff --git a/contrib/pageinspect/expected/hash.out b/contrib/pageinspect/expected/hash.out
new file mode 100644
index 0000000000..3abc887800
--- /dev/null
+++ b/contrib/pageinspect/expected/hash.out
@@ -0,0 +1,150 @@
+CREATE TABLE test_hash (a int, b text);
+INSERT INTO test_hash VALUES (1, 'one');
+CREATE INDEX test_hash_a_idx ON test_hash USING hash (a);
+WARNING: hash indexes are not WAL-logged and their use is discouraged
+\x
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 0));
+-[ RECORD 1 ]--+---------
+hash_page_type | metapage
+
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 1));
+-[ RECORD 1 ]--+-------
+hash_page_type | bucket
+
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 2));
+-[ RECORD 1 ]--+-------
+hash_page_type | bucket
+
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 3));
+-[ RECORD 1 ]--+-------
+hash_page_type | bucket
+
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 4));
+-[ RECORD 1 ]--+-------
+hash_page_type | bucket
+
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 5));
+-[ RECORD 1 ]--+-------
+hash_page_type | bitmap
+
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 6));
+ERROR: block number 6 is out of range for relation "test_hash_a_idx"
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 0);
+ERROR: page is not an overflow page
+DETAIL: Expected 00000001, got 00000008.
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 1);
+ERROR: page is not an overflow page
+DETAIL: Expected 00000001, got 00000002.
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 2);
+ERROR: page is not an overflow page
+DETAIL: Expected 00000001, got 00000002.
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 3);
+ERROR: page is not an overflow page
+DETAIL: Expected 00000001, got 00000002.
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 4);
+ERROR: page is not an overflow page
+DETAIL: Expected 00000001, got 00000002.
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 5);
+ERROR: page is not an overflow page
+DETAIL: Expected 00000001, got 00000004.
+SELECT * FROM hash_metapage_info(get_raw_page('test_hash_a_idx', 0));
+-[ RECORD 1 ]----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+magic | 105121344
+version | 2
+ntuples | 1
+ffactor | 307
+bsize | 8152
+bmsize | 4096
+bmshift | 15
+maxbucket | 3
+highmask | 7
+lowmask | 3
+ovflpoint | 2
+firstfree | 0
+nmaps | 1
+procid | 450
+spares | {0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
+mapp | {5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
+
+SELECT * FROM hash_metapage_info(get_raw_page('test_hash_a_idx', 1));
+ERROR: page is not a hash meta page
+SELECT * FROM hash_metapage_info(get_raw_page('test_hash_a_idx', 2));
+ERROR: page is not a hash meta page
+SELECT * FROM hash_metapage_info(get_raw_page('test_hash_a_idx', 3));
+ERROR: page is not a hash meta page
+SELECT * FROM hash_metapage_info(get_raw_page('test_hash_a_idx', 4));
+ERROR: page is not a hash meta page
+SELECT * FROM hash_metapage_info(get_raw_page('test_hash_a_idx', 5));
+ERROR: page is not a hash meta page
+SELECT * FROM hash_page_stats(get_raw_page('test_hash_a_idx', 0));
+ERROR: page is not a hash bucket or overflow page
+SELECT * FROM hash_page_stats(get_raw_page('test_hash_a_idx', 1));
+-[ RECORD 1 ]---+-----------
+live_items | 0
+dead_items | 0
+page_size | 8192
+free_size | 8148
+hasho_prevblkno | 4294967295
+hasho_nextblkno | 4294967295
+hasho_bucket | 0
+hasho_flag | 2
+hasho_page_id | 65408
+
+SELECT * FROM hash_page_stats(get_raw_page('test_hash_a_idx', 2));
+-[ RECORD 1 ]---+-----------
+live_items | 0
+dead_items | 0
+page_size | 8192
+free_size | 8148
+hasho_prevblkno | 4294967295
+hasho_nextblkno | 4294967295
+hasho_bucket | 1
+hasho_flag | 2
+hasho_page_id | 65408
+
+SELECT * FROM hash_page_stats(get_raw_page('test_hash_a_idx', 3));
+-[ RECORD 1 ]---+-----------
+live_items | 1
+dead_items | 0
+page_size | 8192
+free_size | 8128
+hasho_prevblkno | 4294967295
+hasho_nextblkno | 4294967295
+hasho_bucket | 2
+hasho_flag | 2
+hasho_page_id | 65408
+
+SELECT * FROM hash_page_stats(get_raw_page('test_hash_a_idx', 4));
+-[ RECORD 1 ]---+-----------
+live_items | 0
+dead_items | 0
+page_size | 8192
+free_size | 8148
+hasho_prevblkno | 4294967295
+hasho_nextblkno | 4294967295
+hasho_bucket | 3
+hasho_flag | 2
+hasho_page_id | 65408
+
+SELECT * FROM hash_page_stats(get_raw_page('test_hash_a_idx', 5));
+ERROR: page is not a hash bucket or overflow page
+SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 0));
+ERROR: page is not a hash bucket or overflow page
+SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 1));
+(0 rows)
+
+SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 2));
+(0 rows)
+
+SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 3));
+-[ RECORD 1 ]----------
+itemoffset | 1
+ctid | (0,1)
+data | 2389907270
+
+SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 4));
+(0 rows)
+
+SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 5));
+ERROR: page is not a hash bucket or overflow page
+DROP TABLE test_hash;
diff --git a/contrib/pageinspect/hashfuncs.c b/contrib/pageinspect/hashfuncs.c
new file mode 100644
index 0000000000..5812afe936
--- /dev/null
+++ b/contrib/pageinspect/hashfuncs.c
@@ -0,0 +1,559 @@
+/*
+ * hashfuncs.c
+ * Functions to investigate the content of HASH indexes
+ *
+ * Copyright (c) 2017, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/pageinspect/hashfuncs.c
+ */
+
+#include "postgres.h"
+
+#include "access/hash.h"
+#include "access/htup_details.h"
+#include "catalog/pg_type.h"
+#include "catalog/pg_am.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "utils/builtins.h"
+
+PG_FUNCTION_INFO_V1(hash_page_type);
+PG_FUNCTION_INFO_V1(hash_page_stats);
+PG_FUNCTION_INFO_V1(hash_page_items);
+PG_FUNCTION_INFO_V1(hash_bitmap_info);
+PG_FUNCTION_INFO_V1(hash_metapage_info);
+
+#define IS_HASH(r) ((r)->rd_rel->relam == HASH_AM_OID)
+
+/* ------------------------------------------------
+ * structure for single hash page statistics
+ * ------------------------------------------------
+ */
+typedef struct HashPageStat
+{
+ uint16 live_items;
+ uint16 dead_items;
+ uint16 page_size;
+ uint16 free_size;
+
+ /* opaque data */
+ BlockNumber hasho_prevblkno;
+ BlockNumber hasho_nextblkno;
+ Bucket hasho_bucket;
+ uint16 hasho_flag;
+ uint16 hasho_page_id;
+} HashPageStat;
+
+
+/*
+ * Verify that the given bytea contains a HASH page, or die in the attempt.
+ * A pointer to the page is returned.
+ */
+static Page
+verify_hash_page(bytea *raw_page, int flags)
+{
+ Page page;
+ int raw_page_size;
+ int pagetype;
+ HashPageOpaque pageopaque;
+
+ raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
+
+ if (raw_page_size != BLCKSZ)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid page size"),
+ errdetail("Expected size %d, got %d",
+ BLCKSZ, raw_page_size)));
+
+ page = VARDATA(raw_page);
+
+ if (PageIsNew(page))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index table contains zero page")));
+
+ if (PageGetSpecialSize(page) != MAXALIGN(sizeof(HashPageOpaqueData)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index table contains corrupted page")));
+
+ pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
+ if (pageopaque->hasho_page_id != HASHO_PAGE_ID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("page is not a hash page"),
+ errdetail("Expected %08x, got %08x.",
+ HASHO_PAGE_ID, pageopaque->hasho_page_id)));
+
+ /* Check that page type is sane. */
+ pagetype = pageopaque->hasho_flag & LH_PAGE_TYPE;
+ if (pagetype != LH_OVERFLOW_PAGE && pagetype != LH_BUCKET_PAGE &&
+ pagetype != LH_BITMAP_PAGE && pagetype != LH_META_PAGE)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid hash page type %08x", pagetype)));
+
+ /* If requested, verify page type. */
+ if (flags != 0 && (pagetype & flags) == 0)
+ {
+ switch (flags)
+ {
+ case LH_META_PAGE:
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("page is not a hash meta page")));
+ case LH_BUCKET_PAGE | LH_OVERFLOW_PAGE:
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("page is not a hash bucket or overflow page")));
+ case LH_OVERFLOW_PAGE:
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("page is not a hash overflow page")));
+ default:
+ elog(ERROR,
+ "hash page of type %08x not in mask %08x",
+ pagetype, flags);
+ }
+ }
+
+ /*
+ * If it is the metapage, also verify magic number and version.
+ */
+ if (pagetype == LH_META_PAGE)
+ {
+ HashMetaPage metap = HashPageGetMeta(page);
+
+ if (metap->hashm_magic != HASH_MAGIC)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("invalid magic number for metadata"),
+ errdetail("Expected 0x%08x, got 0x%08x.",
+ HASH_MAGIC, metap->hashm_magic)));
+
+ if (metap->hashm_version != HASH_VERSION)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("invalid version for metadata"),
+ errdetail("Expected %d, got %d",
+ HASH_VERSION, metap->hashm_version)));
+ }
+
+ return page;
+}
+
+/* -------------------------------------------------
+ * GetHashPageStatistics()
+ *
+ * Collect statistics of single hash page
+ * -------------------------------------------------
+ */
+static void
+GetHashPageStatistics(Page page, HashPageStat * stat)
+{
+ OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+ HashPageOpaque opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+ int off;
+
+ stat->dead_items = stat->live_items = 0;
+ stat->page_size = PageGetPageSize(page);
+
+ /* hash page opaque data */
+ stat->hasho_prevblkno = opaque->hasho_prevblkno;
+ stat->hasho_nextblkno = opaque->hasho_nextblkno;
+ stat->hasho_bucket = opaque->hasho_bucket;
+ stat->hasho_flag = opaque->hasho_flag;
+ stat->hasho_page_id = opaque->hasho_page_id;
+
+ /* count live and dead tuples, and free space */
+ for (off = FirstOffsetNumber; off <= maxoff; off++)
+ {
+ ItemId id = PageGetItemId(page, off);
+
+ if (!ItemIdIsDead(id))
+ stat->live_items++;
+ else
+ stat->dead_items++;
+ }
+ stat->free_size = PageGetFreeSpace(page);
+}
+
+/* ---------------------------------------------------
+ * hash_page_type()
+ *
+ * Usage: SELECT hash_page_type(get_raw_page('con_hash_index', 1));
+ * ---------------------------------------------------
+ */
+Datum
+hash_page_type(PG_FUNCTION_ARGS)
+{
+ bytea *raw_page = PG_GETARG_BYTEA_P(0);
+ Page page;
+ HashPageOpaque opaque;
+ char *type;
+
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ (errmsg("must be superuser to use raw page functions"))));
+
+ page = verify_hash_page(raw_page, 0);
+ opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+
+ /* page type (flags) */
+ if (opaque->hasho_flag & LH_META_PAGE)
+ type = "metapage";
+ else if (opaque->hasho_flag & LH_OVERFLOW_PAGE)
+ type = "overflow";
+ else if (opaque->hasho_flag & LH_BUCKET_PAGE)
+ type = "bucket";
+ else if (opaque->hasho_flag & LH_BITMAP_PAGE)
+ type = "bitmap";
+ else
+ type = "unused";
+
+ PG_RETURN_TEXT_P(cstring_to_text(type));
+}
+
+/* ---------------------------------------------------
+ * hash_page_stats()
+ *
+ * Usage: SELECT * FROM hash_page_stats(get_raw_page('con_hash_index', 1));
+ * ---------------------------------------------------
+ */
+Datum
+hash_page_stats(PG_FUNCTION_ARGS)
+{
+ bytea *raw_page = PG_GETARG_BYTEA_P(0);
+ Page page;
+ int j;
+ Datum values[9];
+ bool nulls[9];
+ HashPageStat stat;
+ HeapTuple tuple;
+ TupleDesc tupleDesc;
+
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ (errmsg("must be superuser to use raw page functions"))));
+
+ page = verify_hash_page(raw_page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+
+ /* keep compiler quiet */
+ stat.hasho_prevblkno = stat.hasho_nextblkno = InvalidBlockNumber;
+ stat.hasho_flag = stat.hasho_page_id = stat.free_size = 0;
+
+ GetHashPageStatistics(page, &stat);
+
+ /* Build a tuple descriptor for our result type */
+ if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
+ elog(ERROR, "return type must be a row type");
+ tupleDesc = BlessTupleDesc(tupleDesc);
+
+ MemSet(nulls, 0, sizeof(nulls));
+
+ j = 0;
+ values[j++] = UInt16GetDatum(stat.live_items);
+ values[j++] = UInt16GetDatum(stat.dead_items);
+ values[j++] = UInt16GetDatum(stat.page_size);
+ values[j++] = UInt16GetDatum(stat.free_size);
+ values[j++] = UInt32GetDatum(stat.hasho_prevblkno);
+ values[j++] = UInt32GetDatum(stat.hasho_nextblkno);
+ values[j++] = UInt32GetDatum(stat.hasho_bucket);
+ values[j++] = UInt16GetDatum(stat.hasho_flag);
+ values[j++] = UInt16GetDatum(stat.hasho_page_id);
+
+ tuple = heap_form_tuple(tupleDesc, values, nulls);
+
+ PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
+}
+
+/*
+ * cross-call data structure for SRF
+ */
+struct user_args
+{
+ Page page;
+ OffsetNumber offset;
+};
+
+/*-------------------------------------------------------
+ * hash_page_items()
+ *
+ * Get IndexTupleData set in a hash page
+ *
+ * Usage: SELECT * FROM hash_page_items(get_raw_page('con_hash_index', 1));
+ *-------------------------------------------------------
+ */
+Datum
+hash_page_items(PG_FUNCTION_ARGS)
+{
+ bytea *raw_page = PG_GETARG_BYTEA_P(0);
+ Page page;
+ Datum result;
+ Datum values[3];
+ bool nulls[3];
+ uint32 hashkey;
+ HeapTuple tuple;
+ FuncCallContext *fctx;
+ MemoryContext mctx;
+ struct user_args *uargs;
+
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ (errmsg("must be superuser to use raw page functions"))));
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ TupleDesc tupleDesc;
+
+ fctx = SRF_FIRSTCALL_INIT();
+
+ page = verify_hash_page(raw_page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+
+ mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
+
+ uargs = palloc(sizeof(struct user_args));
+
+ uargs->page = page;
+
+ uargs->offset = FirstOffsetNumber;
+
+ fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
+
+ /* Build a tuple descriptor for our result type */
+ if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
+ elog(ERROR, "return type must be a row type");
+ tupleDesc = BlessTupleDesc(tupleDesc);
+
+ fctx->attinmeta = TupleDescGetAttInMetadata(tupleDesc);
+
+ fctx->user_fctx = uargs;
+
+ MemoryContextSwitchTo(mctx);
+ }
+
+ fctx = SRF_PERCALL_SETUP();
+ uargs = fctx->user_fctx;
+
+ if (fctx->call_cntr < fctx->max_calls)
+ {
+ ItemId id;
+ IndexTuple itup;
+ int j;
+
+ id = PageGetItemId(uargs->page, uargs->offset);
+
+ if (!ItemIdIsValid(id))
+ elog(ERROR, "invalid ItemId");
+
+ itup = (IndexTuple) PageGetItem(uargs->page, id);
+
+ MemSet(nulls, 0, sizeof(nulls));
+
+ j = 0;
+ values[j++] = UInt16GetDatum(uargs->offset);
+ values[j++] = PointerGetDatum(&itup->t_tid);
+
+ hashkey = _hash_get_indextuple_hashkey(itup);
+ values[j] = UInt32GetDatum(hashkey);
+
+ tuple = heap_form_tuple(fctx->attinmeta->tupdesc, values, nulls);
+ result = HeapTupleGetDatum(tuple);
+
+ uargs->offset = uargs->offset + 1;
+
+ SRF_RETURN_NEXT(fctx, result);
+ }
+ else
+ {
+ pfree(uargs);
+ SRF_RETURN_DONE(fctx);
+ }
+}
+
+/* ------------------------------------------------
+ * hash_bitmap_info()
+ *
+ * Get bitmap information for a particular overflow page
+ *
+ * Usage: SELECT * FROM hash_bitmap_info('con_hash_index'::regclass, 5);
+ * ------------------------------------------------
+ */
+Datum
+hash_bitmap_info(PG_FUNCTION_ARGS)
+{
+ Oid indexRelid = PG_GETARG_OID(0);
+ uint32 ovflblkno = PG_GETARG_UINT32(1);
+ HashMetaPage metap;
+ Buffer buf,
+ metabuf;
+ BlockNumber bitmapblkno;
+ Page page;
+ bool bit = false;
+ HashPageOpaque opaque;
+ TupleDesc tupleDesc;
+ Relation indexRel;
+ uint32 ovflbitno;
+ int32 bitmappage,
+ bitmapbit;
+ HeapTuple tuple;
+ int j;
+ Datum values[3];
+ bool nulls[3];
+
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ (errmsg("must be superuser to use raw page functions"))));
+
+ indexRel = index_open(indexRelid, AccessShareLock);
+
+ if (!IS_HASH(indexRel))
+ elog(ERROR, "relation \"%s\" is not a hash index",
+ RelationGetRelationName(indexRel));
+
+ if (RELATION_IS_OTHER_TEMP(indexRel))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot access temporary tables of other sessions")));
+
+ if (RelationGetNumberOfBlocks(indexRel) <= (BlockNumber) (ovflblkno))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("block number %u is out of range for relation \"%s\"",
+ ovflblkno, RelationGetRelationName(indexRel))));
+
+ buf = ReadBufferExtended(indexRel, MAIN_FORKNUM, ovflblkno, RBM_NORMAL, NULL);
+ LockBuffer(buf, BUFFER_LOCK_SHARE);
+ _hash_checkpage(indexRel, buf, LH_PAGE_TYPE);
+ page = BufferGetPage(buf);
+ opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+
+ if (opaque->hasho_flag != LH_OVERFLOW_PAGE)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("page is not an overflow page"),
+ errdetail("Expected %08x, got %08x.",
+ LH_OVERFLOW_PAGE, opaque->hasho_flag)));
+
+ if (BlockNumberIsValid(opaque->hasho_prevblkno))
+ bit = true;
+
+ UnlockReleaseBuffer(buf);
+
+ /* Read the metapage so we can determine which bitmap page to use */
+ metabuf = _hash_getbuf(indexRel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
+ metap = HashPageGetMeta(BufferGetPage(metabuf));
+
+ /* Identify overflow bit number */
+ ovflbitno = _hash_ovflblkno_to_bitno(metap, ovflblkno);
+
+ bitmappage = ovflbitno >> BMPG_SHIFT(metap);
+ bitmapbit = ovflbitno & BMPG_MASK(metap);
+
+ if (bitmappage >= metap->hashm_nmaps)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid overflow bit number %u", ovflbitno)));
+
+ bitmapblkno = metap->hashm_mapp[bitmappage];
+
+ _hash_relbuf(indexRel, metabuf);
+
+ index_close(indexRel, AccessShareLock);
+
+ /* Build a tuple descriptor for our result type */
+ if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
+ elog(ERROR, "return type must be a row type");
+ tupleDesc = BlessTupleDesc(tupleDesc);
+
+ MemSet(nulls, 0, sizeof(nulls));
+
+ j = 0;
+ values[j++] = UInt32GetDatum(bitmapblkno);
+ values[j++] = Int32GetDatum(bitmapbit);
+ values[j++] = BoolGetDatum(bit);
+
+ tuple = heap_form_tuple(tupleDesc, values, nulls);
+
+ PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
+}
+
+/* ------------------------------------------------
+ * hash_metapage_info()
+ *
+ * Get the meta-page information for a hash index
+ *
+ * Usage: SELECT * FROM hash_metapage_info(get_raw_page('con_hash_index', 0))
+ * ------------------------------------------------
+ */
+Datum
+hash_metapage_info(PG_FUNCTION_ARGS)
+{
+ bytea *raw_page = PG_GETARG_BYTEA_P(0);
+ Page page;
+ HashMetaPageData *metad;
+ TupleDesc tupleDesc;
+ HeapTuple tuple;
+ int i,
+ j;
+ Datum values[16];
+ bool nulls[16];
+ Datum spares[HASH_MAX_SPLITPOINTS];
+ Datum mapp[HASH_MAX_BITMAPS];
+
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ (errmsg("must be superuser to use raw page functions"))));
+
+ page = verify_hash_page(raw_page, LH_META_PAGE);
+
+ /* Build a tuple descriptor for our result type */
+ if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
+ elog(ERROR, "return type must be a row type");
+ tupleDesc = BlessTupleDesc(tupleDesc);
+
+ metad = HashPageGetMeta(page);
+
+ MemSet(nulls, 0, sizeof(nulls));
+
+ j = 0;
+ values[j++] = UInt32GetDatum(metad->hashm_magic);
+ values[j++] = UInt32GetDatum(metad->hashm_version);
+ values[j++] = Float8GetDatum(metad->hashm_ntuples);
+ values[j++] = UInt16GetDatum(metad->hashm_ffactor);
+ values[j++] = UInt16GetDatum(metad->hashm_bsize);
+ values[j++] = UInt16GetDatum(metad->hashm_bmsize);
+ values[j++] = UInt16GetDatum(metad->hashm_bmshift);
+ values[j++] = UInt32GetDatum(metad->hashm_maxbucket);
+ values[j++] = UInt32GetDatum(metad->hashm_highmask);
+ values[j++] = UInt32GetDatum(metad->hashm_lowmask);
+ values[j++] = UInt32GetDatum(metad->hashm_ovflpoint);
+ values[j++] = UInt32GetDatum(metad->hashm_firstfree);
+ values[j++] = UInt32GetDatum(metad->hashm_nmaps);
+ values[j++] = UInt16GetDatum(metad->hashm_procid);
+
+ for (i = 0; i < HASH_MAX_SPLITPOINTS; i++)
+ spares[i] = UInt32GetDatum(metad->hashm_spares[i]);
+ values[j++] = PointerGetDatum(construct_array(spares,
+ HASH_MAX_SPLITPOINTS,
+ INT8OID,
+ 8, true, 'd'));
+
+ for (i = 0; i < HASH_MAX_BITMAPS; i++)
+ mapp[i] = UInt32GetDatum(metad->hashm_mapp[i]);
+ values[j++] = PointerGetDatum(construct_array(mapp,
+ HASH_MAX_BITMAPS,
+ INT8OID,
+ 8, true, 'd'));
+
+ tuple = heap_form_tuple(tupleDesc, values, nulls);
+
+ PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
+}
diff --git a/contrib/pageinspect/pageinspect--1.5--1.6.sql b/contrib/pageinspect/pageinspect--1.5--1.6.sql
new file mode 100644
index 0000000000..d0355b4c7e
--- /dev/null
+++ b/contrib/pageinspect/pageinspect--1.5--1.6.sql
@@ -0,0 +1,77 @@
+/* contrib/pageinspect/pageinspect--1.5--1.6.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pageinspect UPDATE TO '1.6'" to load this file. \quit
+
+--
+-- HASH functions
+--
+
+--
+-- hash_page_type()
+--
+CREATE FUNCTION hash_page_type(IN page bytea)
+RETURNS text
+AS 'MODULE_PATHNAME', 'hash_page_type'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+--
+-- hash_page_stats()
+--
+CREATE FUNCTION hash_page_stats(IN page bytea,
+ OUT live_items smallint,
+ OUT dead_items smallint,
+ OUT page_size smallint,
+ OUT free_size smallint,
+ OUT hasho_prevblkno int8,
+ OUT hasho_nextblkno int8,
+ OUT hasho_bucket int8,
+ OUT hasho_flag smallint,
+ OUT hasho_page_id int4)
+AS 'MODULE_PATHNAME', 'hash_page_stats'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+--
+-- hash_page_items()
+--
+CREATE FUNCTION hash_page_items(IN page bytea,
+ OUT itemoffset smallint,
+ OUT ctid tid,
+ OUT data int8)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'hash_page_items'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+--
+-- hash_bitmap_info()
+--
+CREATE FUNCTION hash_bitmap_info(IN index_oid regclass, IN blkno int8,
+ OUT bitmapblkno int8,
+ OUT bitmapbit int4,
+ OUT bitstatus bool)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'hash_bitmap_info'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+--
+-- hash_metapage_info()
+--
+CREATE FUNCTION hash_metapage_info(IN page bytea,
+ OUT magic int8,
+ OUT version int8,
+ OUT ntuples double precision,
+ OUT ffactor int4,
+ OUT bsize int4,
+ OUT bmsize int4,
+ OUT bmshift int4,
+ OUT maxbucket int8,
+ OUT highmask int8,
+ OUT lowmask int8,
+ OUT ovflpoint int8,
+ OUT firstfree int8,
+ OUT nmaps int8,
+ OUT procid int4,
+ OUT spares int8[],
+ OUT mapp int8[])
+AS 'MODULE_PATHNAME', 'hash_metapage_info'
+LANGUAGE C STRICT PARALLEL SAFE;
diff --git a/contrib/pageinspect/pageinspect.control b/contrib/pageinspect/pageinspect.control
index 23c8eff9cd..1a61c9f5ad 100644
--- a/contrib/pageinspect/pageinspect.control
+++ b/contrib/pageinspect/pageinspect.control
@@ -1,5 +1,5 @@
# pageinspect extension
comment = 'inspect the contents of database pages at a low level'
-default_version = '1.5'
+default_version = '1.6'
module_pathname = '$libdir/pageinspect'
relocatable = true
diff --git a/contrib/pageinspect/sql/hash.sql b/contrib/pageinspect/sql/hash.sql
new file mode 100644
index 0000000000..9e7635e36d
--- /dev/null
+++ b/contrib/pageinspect/sql/hash.sql
@@ -0,0 +1,49 @@
+CREATE TABLE test_hash (a int, b text);
+INSERT INTO test_hash VALUES (1, 'one');
+CREATE INDEX test_hash_a_idx ON test_hash USING hash (a);
+
+\x
+
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 0));
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 1));
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 2));
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 3));
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 4));
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 5));
+SELECT hash_page_type(get_raw_page('test_hash_a_idx', 6));
+
+
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 0);
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 1);
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 2);
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 3);
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 4);
+SELECT * FROM hash_bitmap_info('test_hash_a_idx', 5);
+
+
+
+SELECT * FROM hash_metapage_info(get_raw_page('test_hash_a_idx', 0));
+SELECT * FROM hash_metapage_info(get_raw_page('test_hash_a_idx', 1));
+SELECT * FROM hash_metapage_info(get_raw_page('test_hash_a_idx', 2));
+SELECT * FROM hash_metapage_info(get_raw_page('test_hash_a_idx', 3));
+SELECT * FROM hash_metapage_info(get_raw_page('test_hash_a_idx', 4));
+SELECT * FROM hash_metapage_info(get_raw_page('test_hash_a_idx', 5));
+
+
+SELECT * FROM hash_page_stats(get_raw_page('test_hash_a_idx', 0));
+SELECT * FROM hash_page_stats(get_raw_page('test_hash_a_idx', 1));
+SELECT * FROM hash_page_stats(get_raw_page('test_hash_a_idx', 2));
+SELECT * FROM hash_page_stats(get_raw_page('test_hash_a_idx', 3));
+SELECT * FROM hash_page_stats(get_raw_page('test_hash_a_idx', 4));
+SELECT * FROM hash_page_stats(get_raw_page('test_hash_a_idx', 5));
+
+
+SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 0));
+SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 1));
+SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 2));
+SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 3));
+SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 4));
+SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 5));
+
+
+DROP TABLE test_hash;
diff --git a/doc/src/sgml/pageinspect.sgml b/doc/src/sgml/pageinspect.sgml
index d12dbac32d..4c201e75b0 100644
--- a/doc/src/sgml/pageinspect.sgml
+++ b/doc/src/sgml/pageinspect.sgml
@@ -486,6 +486,150 @@ test=# SELECT first_tid, nbytes, tids[0:5] AS some_tids
(170,30) | 376 | {"(170,30)","(170,31)","(170,32)","(170,33)","(170,34)"}
(173,44) | 197 | {"(173,44)","(173,45)","(173,46)","(173,47)","(173,48)"}
(7 rows)
+
+
+
+
+
+
+
+
+ Hash Functions
+
+
+
+
+ hash_page_type(page bytea) returns text
+
+ hash_page_type
+
+
+
+
+
+ hash_page_type returns page type of
+ the given HASH index page. For example:
+
+test=# SELECT hash_page_type(get_raw_page('con_hash_index', 0));
+ hash_page_type
+----------------
+ metapage
+
+
+
+
+
+
+
+ hash_page_stats(page bytea) returns setof record
+
+ hash_page_stats
+
+
+
+
+
+ hash_page_stats returns information about
+ a bucket or overflow page of a HASH index.
+ For example:
+
+test=# SELECT * FROM hash_page_stats(get_raw_page('con_hash_index', 1));
+-[ RECORD 1 ]---+-----------
+live_items | 407
+dead_items | 0
+page_size | 8192
+free_size | 8
+hasho_prevblkno | 4294967295
+hasho_nextblkno | 8474
+hasho_bucket | 0
+hasho_flag | 66
+hasho_page_id | 65408
+
+
+
+
+
+
+
+ hash_page_items(page bytea) returns setof record
+
+ hash_page_items
+
+
+
+
+
+ hash_page_items returns information about
+ the data stored in a bucket or overflow page of a HASH
+ index page. For example:
+
+test=# SELECT * FROM hash_page_items(get_raw_page('con_hash_index', 1)) LIMIT 5;
+ itemoffset | ctid | data
+------------+-----------+------------
+ 1 | (899,77) | 1053474816
+ 2 | (897,29) | 1053474816
+ 3 | (894,207) | 1053474816
+ 4 | (892,159) | 1053474816
+ 5 | (890,111) | 1053474816
+
+
+
+
+
+
+
+ hash_bitmap_info(index oid, blkno int) returns record
+
+ hash_bitmap_info
+
+
+
+
+
+ hash_bitmap_info shows the status of a bit
+ in the bitmap page for a particular overflow page of HASH
+ index. For example:
+
+test=# SELECT * FROM hash_bitmap_info('con_hash_index', 2052);
+ bitmapblkno | bitmapbit | bitstatus
+-------------+-----------+-----------
+ 65 | 3 | t
+
+
+
+
+
+
+
+ hash_metapage_info(page bytea) returns record
+
+ hash_metapage_info
+
+
+
+
+
+ hash_metapage_info returns information stored
+ in meta page of a HASH index. For example:
+
+test=# SELECT * FROM hash_metapage_info(get_raw_page('con_hash_index', 0));
+-[ RECORD 1 ]-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+magic | 105121344
+version | 2
+ntuples | 500500
+ffactor | 40
+bsize | 8152
+bmsize | 4096
+bmshift | 15
+maxbucket | 12512
+highmask | 16383
+lowmask | 8191
+ovflpoint | 14
+firstfree | 1204
+nmaps | 1
+procid | 450
+spares | {0,0,0,0,0,0,1,1,1,1,1,4,59,704,1204,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
+mapp | {65,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c
index e8928efc1a..753c8a6a13 100644
--- a/src/backend/access/hash/hashovfl.c
+++ b/src/backend/access/hash/hashovfl.c
@@ -52,10 +52,12 @@ bitno_to_blkno(HashMetaPage metap, uint32 ovflbitnum)
}
/*
+ * _hash_ovflblkno_to_bitno
+ *
* Convert overflow page block number to bit number for free-page bitmap.
*/
-static uint32
-blkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
+uint32
+_hash_ovflblkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
{
uint32 splitnum = metap->hashm_ovflpoint;
uint32 i;
@@ -485,7 +487,7 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf, Buffer wbuf,
metap = HashPageGetMeta(BufferGetPage(metabuf));
/* Identify which bit to set */
- ovflbitno = blkno_to_bitno(metap, ovflblkno);
+ ovflbitno = _hash_ovflblkno_to_bitno(metap, ovflblkno);
bitmappage = ovflbitno >> BMPG_SHIFT(metap);
bitmapbit = ovflbitno & BMPG_MASK(metap);
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index 69a3873fac..1a9b91f9f5 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -58,6 +58,9 @@ typedef uint32 Bucket;
#define LH_BUCKET_BEING_SPLIT (1 << 5)
#define LH_BUCKET_NEEDS_SPLIT_CLEANUP (1 << 6)
+#define LH_PAGE_TYPE \
+ (LH_OVERFLOW_PAGE|LH_BUCKET_PAGE|LH_BITMAP_PAGE|LH_META_PAGE)
+
typedef struct HashPageOpaqueData
{
BlockNumber hasho_prevblkno; /* previous ovfl (or bucket) blkno */
@@ -299,6 +302,7 @@ extern void _hash_squeezebucket(Relation rel,
Bucket bucket, BlockNumber bucket_blkno,
Buffer bucket_buf,
BufferAccessStrategy bstrategy);
+extern uint32 _hash_ovflblkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno);
/* hashpage.c */
extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno,
--
cgit v1.2.3
From ecb814b5cef4971e50ee177eff320d416aa0b882 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Thu, 2 Feb 2017 16:04:59 -0500
Subject: doc: Document result set of CREATE_REPLICATION_SLOT
From: Marko Tiikkaja
---
doc/src/sgml/protocol.sgml | 49 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 49 insertions(+)
(limited to 'doc/src')
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index 9f054added..2e84a29901 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -1510,6 +1510,55 @@ The commands accepted in walsender mode are:
+
+
+ In response to this command, the server will send a one-row result set
+ containing the following fields:
+
+
+
+ slot_name (text)
+
+
+ The name of the newly-created replication slot.
+
+
+
+
+
+ consistent_point (text)
+
+
+ The WAL position at which the slot became consistent. This is the
+ earliest location from which streaming can start on this replication
+ slot.
+
+
+
+
+
+ snapshot_name (text)
+
+
+ The identifier of the snapshot exported by the command. The
+ snapshot is valid until a new command is executed on this connection
+ or the replication connection is closed. Null if the created slot
+ is physical.
+
+
+
+
+
+ output_plugin (text)
+
+
+ The name of the output plugin used by the newly-created replication
+ slot. Null if the created slot is physical.
+
+
+
+
+
--
cgit v1.2.3
From aa09b9dcd57cbf18516332bf8310c12ef6f83715 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Thu, 2 Feb 2017 16:49:46 -0500
Subject: doc: Add missing include in example code
It's not broken because the header file is included via other headers,
but for better style we should be more explicit.
Reported-by: mthrockmorton@hme.com
---
doc/src/sgml/trigger.sgml | 1 +
1 file changed, 1 insertion(+)
(limited to 'doc/src')
diff --git a/doc/src/sgml/trigger.sgml b/doc/src/sgml/trigger.sgml
index 22966c06dc..8f724c8664 100644
--- a/doc/src/sgml/trigger.sgml
+++ b/doc/src/sgml/trigger.sgml
@@ -705,6 +705,7 @@ CREATE TABLE ttest (
This is the source code of the trigger function:
-
- FETCH_COUNT is ignored if it is unset or does not
- have a positive value. It cannot be set to a value that is not
- syntactically an integer.
-
-
Although you can use any output format with this feature,
@@ -3316,10 +3310,8 @@ bar
HISTSIZE
- The maximum number of commands to store in the command history.
- If unset, at most 500 commands are stored by default.
- If set to a value that is negative or not an integer, no limit is
- applied.
+ The maximum number of commands to store in the command history
+ (default 500). If set to a negative value, no limit is applied.
@@ -3345,13 +3337,13 @@ bar
IGNOREEOF
- If unset, sending an EOF> character (usually
+ If set to 1 or less, sending an EOF> character (usually
Control>D>>)
to an interactive session of psql
- will terminate the application. If set to a numeric value,
- that many EOF> characters are ignored before the
- application terminates. If the variable is set but not to a
- numeric value, the default is 10.
+ will terminate the application. If set to a larger numeric value,
+ that many consecutive EOF> characters must be typed to
+ make an interactive session terminate. If the variable is set to a
+ non-numeric value, it is interpreted as 10.
diff --git a/src/bin/psql/help.c b/src/bin/psql/help.c
index 53656294da..3e3cab4941 100644
--- a/src/bin/psql/help.c
+++ b/src/bin/psql/help.c
@@ -348,9 +348,9 @@ helpVariables(unsigned short int pager)
" (default: 0=unlimited)\n"));
fprintf(output, _(" HISTCONTROL controls command history [ignorespace, ignoredups, ignoreboth]\n"));
fprintf(output, _(" HISTFILE file name used to store the command history\n"));
- fprintf(output, _(" HISTSIZE the number of commands to store in the command history\n"));
+ fprintf(output, _(" HISTSIZE max number of commands to store in the command history\n"));
fprintf(output, _(" HOST the currently connected database server host\n"));
- fprintf(output, _(" IGNOREEOF if unset, sending an EOF to interactive session terminates application\n"));
+ fprintf(output, _(" IGNOREEOF number of EOFs needed to terminate an interactive session\n"));
fprintf(output, _(" LASTOID value of the last affected OID\n"));
fprintf(output, _(" ON_ERROR_ROLLBACK if set, an error doesn't stop a transaction (uses implicit savepoints)\n"));
fprintf(output, _(" ON_ERROR_STOP stop batch execution after error\n"));
diff --git a/src/bin/psql/input.c b/src/bin/psql/input.c
index 3e3e97ad0d..b8c9a00b09 100644
--- a/src/bin/psql/input.c
+++ b/src/bin/psql/input.c
@@ -539,10 +539,7 @@ finishInput(void)
#ifdef USE_READLINE
if (useHistory && psql_history)
{
- int hist_size;
-
- hist_size = GetVariableNum(pset.vars, "HISTSIZE", 500, -1);
- (void) saveHistory(psql_history, hist_size);
+ (void) saveHistory(psql_history, pset.histsize);
free(psql_history);
psql_history = NULL;
}
diff --git a/src/bin/psql/mainloop.c b/src/bin/psql/mainloop.c
index dc25b4babc..6e358e2e1b 100644
--- a/src/bin/psql/mainloop.c
+++ b/src/bin/psql/mainloop.c
@@ -162,7 +162,7 @@ MainLoop(FILE *source)
/* This tries to mimic bash's IGNOREEOF feature. */
count_eof++;
- if (count_eof < GetVariableNum(pset.vars, "IGNOREEOF", 0, 10))
+ if (count_eof < pset.ignoreeof)
{
if (!pset.quiet)
printf(_("Use \"\\q\" to leave %s.\n"), pset.progname);
diff --git a/src/bin/psql/settings.h b/src/bin/psql/settings.h
index 4c7c3b1fa3..195f5a1184 100644
--- a/src/bin/psql/settings.h
+++ b/src/bin/psql/settings.h
@@ -125,6 +125,8 @@ typedef struct _psqlSettings
bool singleline;
bool singlestep;
int fetch_count;
+ int histsize;
+ int ignoreeof;
PSQL_ECHO echo;
PSQL_ECHO_HIDDEN echo_hidden;
PSQL_ERROR_ROLLBACK on_error_rollback;
diff --git a/src/bin/psql/startup.c b/src/bin/psql/startup.c
index a3654e6272..88d686a5b7 100644
--- a/src/bin/psql/startup.c
+++ b/src/bin/psql/startup.c
@@ -774,6 +774,11 @@ showVersion(void)
* Substitute hooks and assign hooks for psql variables.
*
* This isn't an amazingly good place for them, but neither is anywhere else.
+ *
+ * By policy, every special variable that controls any psql behavior should
+ * have one or both hooks, even if they're just no-ops. This ensures that
+ * the variable will remain present in variables.c's list even when unset,
+ * which ensures that it's known to tab completion.
*/
static char *
@@ -823,16 +828,71 @@ singlestep_hook(const char *newval)
return ParseVariableBool(newval, "SINGLESTEP", &pset.singlestep);
}
+static char *
+fetch_count_substitute_hook(char *newval)
+{
+ if (newval == NULL)
+ newval = pg_strdup("0");
+ return newval;
+}
+
static bool
fetch_count_hook(const char *newval)
{
- if (newval == NULL)
- pset.fetch_count = -1; /* default value */
- else if (!ParseVariableNum(newval, "FETCH_COUNT", &pset.fetch_count))
- return false;
+ return ParseVariableNum(newval, "FETCH_COUNT", &pset.fetch_count);
+}
+
+static bool
+histfile_hook(const char *newval)
+{
+ /*
+ * Someday we might try to validate the filename, but for now, this is
+ * just a placeholder to ensure HISTFILE is known to tab completion.
+ */
return true;
}
+static char *
+histsize_substitute_hook(char *newval)
+{
+ if (newval == NULL)
+ newval = pg_strdup("500");
+ return newval;
+}
+
+static bool
+histsize_hook(const char *newval)
+{
+ return ParseVariableNum(newval, "HISTSIZE", &pset.histsize);
+}
+
+static char *
+ignoreeof_substitute_hook(char *newval)
+{
+ int dummy;
+
+ /*
+ * This tries to mimic the behavior of bash, to wit "If set, the value is
+ * the number of consecutive EOF characters which must be typed as the
+ * first characters on an input line before bash exits. If the variable
+ * exists but does not have a numeric value, or has no value, the default
+ * value is 10. If it does not exist, EOF signifies the end of input to
+ * the shell." Unlike bash, however, we insist on the stored value
+ * actually being a valid integer.
+ */
+ if (newval == NULL)
+ newval = pg_strdup("0");
+ else if (!ParseVariableNum(newval, NULL, &dummy))
+ newval = pg_strdup("10");
+ return newval;
+}
+
+static bool
+ignoreeof_hook(const char *newval)
+{
+ return ParseVariableNum(newval, "IGNOREEOF", &pset.ignoreeof);
+}
+
static char *
echo_substitute_hook(char *newval)
{
@@ -1062,8 +1122,17 @@ EstablishVariableSpace(void)
bool_substitute_hook,
singlestep_hook);
SetVariableHooks(pset.vars, "FETCH_COUNT",
- NULL,
+ fetch_count_substitute_hook,
fetch_count_hook);
+ SetVariableHooks(pset.vars, "HISTFILE",
+ NULL,
+ histfile_hook);
+ SetVariableHooks(pset.vars, "HISTSIZE",
+ histsize_substitute_hook,
+ histsize_hook);
+ SetVariableHooks(pset.vars, "IGNOREEOF",
+ ignoreeof_substitute_hook,
+ ignoreeof_hook);
SetVariableHooks(pset.vars, "ECHO",
echo_substitute_hook,
echo_hook);
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index d6fffcf42f..6e759d0b76 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -3775,8 +3775,9 @@ append_variable_names(char ***varnames, int *nvars,
/*
* This function supports completion with the name of a psql variable.
* The variable names can be prefixed and suffixed with additional text
- * to support quoting usages. If need_value is true, only the variables
- * that have the set values are picked up.
+ * to support quoting usages. If need_value is true, only variables
+ * that are currently set are included; otherwise, special variables
+ * (those that have hooks) are included even if currently unset.
*/
static char **
complete_from_variables(const char *text, const char *prefix, const char *suffix,
@@ -3789,33 +3790,12 @@ complete_from_variables(const char *text, const char *prefix, const char *suffix
int i;
struct _variable *ptr;
- static const char *const known_varnames[] = {
- "AUTOCOMMIT", "COMP_KEYWORD_CASE", "DBNAME", "ECHO", "ECHO_HIDDEN",
- "ENCODING", "FETCH_COUNT", "HISTCONTROL", "HISTFILE", "HISTSIZE",
- "HOST", "IGNOREEOF", "LASTOID", "ON_ERROR_ROLLBACK", "ON_ERROR_STOP",
- "PORT", "PROMPT1", "PROMPT2", "PROMPT3", "QUIET",
- "SHOW_CONTEXT", "SINGLELINE", "SINGLESTEP",
- "USER", "VERBOSITY", NULL
- };
-
varnames = (char **) pg_malloc((maxvars + 1) * sizeof(char *));
- if (!need_value)
- {
- for (i = 0; known_varnames[i] && nvars < maxvars; i++)
- append_variable_names(&varnames, &nvars, &maxvars,
- known_varnames[i], prefix, suffix);
- }
-
for (ptr = pset.vars->next; ptr; ptr = ptr->next)
{
if (need_value && !(ptr->value))
continue;
- for (i = 0; known_varnames[i]; i++) /* remove duplicate entry */
- {
- if (strcmp(ptr->name, known_varnames[i]) == 0)
- continue;
- }
append_variable_names(&varnames, &nvars, &maxvars, ptr->name,
prefix, suffix);
}
diff --git a/src/bin/psql/variables.c b/src/bin/psql/variables.c
index 9ca100095f..d9d07631a5 100644
--- a/src/bin/psql/variables.c
+++ b/src/bin/psql/variables.c
@@ -179,31 +179,6 @@ ParseVariableNum(const char *value, const char *name, int *result)
}
}
-/*
- * Read integer value of the numeric variable named "name".
- *
- * Return defaultval if it is not set, or faultval if its value is not a
- * valid integer. (No error message is issued.)
- */
-int
-GetVariableNum(VariableSpace space,
- const char *name,
- int defaultval,
- int faultval)
-{
- const char *val;
- int result;
-
- val = GetVariable(space, name);
- if (!val)
- return defaultval;
-
- if (ParseVariableNum(val, NULL, &result))
- return result;
- else
- return faultval;
-}
-
/*
* Print values of all variables.
*/
diff --git a/src/bin/psql/variables.h b/src/bin/psql/variables.h
index 84be780509..19257937c7 100644
--- a/src/bin/psql/variables.h
+++ b/src/bin/psql/variables.h
@@ -81,11 +81,6 @@ bool ParseVariableBool(const char *value, const char *name,
bool ParseVariableNum(const char *value, const char *name,
int *result);
-int GetVariableNum(VariableSpace space,
- const char *name,
- int defaultval,
- int faultval);
-
void PrintVariables(VariableSpace space);
bool SetVariable(VariableSpace space, const char *name, const char *value);
--
cgit v1.2.3
From e759854a09d49725a9519c48a0d71a32bab05a01 Mon Sep 17 00:00:00 2001
From: Robert Haas
Date: Fri, 3 Feb 2017 14:35:25 -0500
Subject: pgstattuple: Add pgstathashindex.
Since pgstattuple v1.5 hasn't been released yet, no need for a new
extension version. The new function exposes statistics about hash
indexes similar to what other pgstatindex functions return for other
index types.
Ashutosh Sharma, reviewed by Kuntal Ghosh. Substantial further
revisions by me.
---
contrib/pgstattuple/expected/pgstattuple.out | 8 ++
contrib/pgstattuple/pgstatindex.c | 195 ++++++++++++++++++++++++++
contrib/pgstattuple/pgstattuple--1.4--1.5.sql | 16 +++
contrib/pgstattuple/sql/pgstattuple.sql | 4 +
doc/src/sgml/pgstattuple.sgml | 95 +++++++++++++
5 files changed, 318 insertions(+)
(limited to 'doc/src')
diff --git a/contrib/pgstattuple/expected/pgstattuple.out b/contrib/pgstattuple/expected/pgstattuple.out
index e920234488..169d1932b2 100644
--- a/contrib/pgstattuple/expected/pgstattuple.out
+++ b/contrib/pgstattuple/expected/pgstattuple.out
@@ -130,3 +130,11 @@ select * from pgstatginindex('test_ginidx');
2 | 0 | 0
(1 row)
+create index test_hashidx on test using hash (b);
+WARNING: hash indexes are not WAL-logged and their use is discouraged
+select * from pgstathashindex('test_hashidx');
+ version | bucket_pages | overflow_pages | bitmap_pages | zero_pages | live_items | dead_items | free_percent
+---------+--------------+----------------+--------------+------------+------------+------------+--------------
+ 2 | 4 | 0 | 1 | 0 | 0 | 0 | 100
+(1 row)
+
diff --git a/contrib/pgstattuple/pgstatindex.c b/contrib/pgstattuple/pgstatindex.c
index b40669250a..17a53e3bb7 100644
--- a/contrib/pgstattuple/pgstatindex.c
+++ b/contrib/pgstattuple/pgstatindex.c
@@ -29,6 +29,7 @@
#include "access/gin_private.h"
#include "access/heapam.h"
+#include "access/hash.h"
#include "access/htup_details.h"
#include "access/nbtree.h"
#include "catalog/namespace.h"
@@ -36,6 +37,7 @@
#include "funcapi.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
#include "utils/builtins.h"
#include "utils/rel.h"
#include "utils/varlena.h"
@@ -54,6 +56,7 @@ PG_FUNCTION_INFO_V1(pgstatindexbyid);
PG_FUNCTION_INFO_V1(pg_relpages);
PG_FUNCTION_INFO_V1(pg_relpagesbyid);
PG_FUNCTION_INFO_V1(pgstatginindex);
+PG_FUNCTION_INFO_V1(pgstathashindex);
PG_FUNCTION_INFO_V1(pgstatindex_v1_5);
PG_FUNCTION_INFO_V1(pgstatindexbyid_v1_5);
@@ -66,6 +69,7 @@ Datum pgstatginindex_internal(Oid relid, FunctionCallInfo fcinfo);
#define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX)
#define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID)
#define IS_GIN(r) ((r)->rd_rel->relam == GIN_AM_OID)
+#define IS_HASH(r) ((r)->rd_rel->relam == HASH_AM_OID)
/* ------------------------------------------------
* A structure for a whole btree index statistics
@@ -102,7 +106,29 @@ typedef struct GinIndexStat
int64 pending_tuples;
} GinIndexStat;
+/* ------------------------------------------------
+ * A structure for a whole HASH index statistics
+ * used by pgstathashindex().
+ * ------------------------------------------------
+ */
+typedef struct HashIndexStat
+{
+ int32 version;
+ int32 space_per_page;
+
+ BlockNumber bucket_pages;
+ BlockNumber overflow_pages;
+ BlockNumber bitmap_pages;
+ BlockNumber zero_pages;
+
+ int64 live_items;
+ int64 dead_items;
+ uint64 free_space;
+} HashIndexStat;
+
static Datum pgstatindex_impl(Relation rel, FunctionCallInfo fcinfo);
+static void GetHashPageStats(Page page, HashIndexStat *stats);
+
/* ------------------------------------------------------
* pgstatindex()
@@ -528,3 +554,172 @@ pgstatginindex_internal(Oid relid, FunctionCallInfo fcinfo)
return (result);
}
+
+/* ------------------------------------------------------
+ * pgstathashindex()
+ *
+ * Usage: SELECT * FROM pgstathashindex('hashindex');
+ * ------------------------------------------------------
+ */
+Datum
+pgstathashindex(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ BlockNumber nblocks;
+ BlockNumber blkno;
+ Relation rel;
+ HashIndexStat stats;
+ BufferAccessStrategy bstrategy;
+ HeapTuple tuple;
+ TupleDesc tupleDesc;
+ Datum values[8];
+ bool nulls[8];
+ Buffer metabuf;
+ HashMetaPage metap;
+ float8 free_percent;
+ uint64 total_space;
+
+ rel = index_open(relid, AccessShareLock);
+
+ if (!IS_HASH(rel))
+ elog(ERROR, "relation \"%s\" is not a HASH index",
+ RelationGetRelationName(rel));
+
+ /*
+ * Reject attempts to read non-local temporary relations; we would be
+ * likely to get wrong data since we have no visibility into the owning
+ * session's local buffers.
+ */
+ if (RELATION_IS_OTHER_TEMP(rel))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot access temporary indexes of other sessions")));
+
+ /* Get the information we need from the metapage. */
+ memset(&stats, 0, sizeof(stats));
+ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
+ metap = HashPageGetMeta(BufferGetPage(metabuf));
+ stats.version = metap->hashm_version;
+ stats.space_per_page = metap->hashm_bsize;
+ _hash_relbuf(rel, metabuf);
+
+ /* Get the current relation length */
+ nblocks = RelationGetNumberOfBlocks(rel);
+
+ /* prepare access strategy for this index */
+ bstrategy = GetAccessStrategy(BAS_BULKREAD);
+
+ /* Start from blkno 1 as 0th block is metapage */
+ for (blkno = 1; blkno < nblocks; blkno++)
+ {
+ Buffer buf;
+ Page page;
+ HashPageOpaque opaque;
+
+ CHECK_FOR_INTERRUPTS();
+
+ buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
+ bstrategy);
+ LockBuffer(buf, BUFFER_LOCK_SHARE);
+ page = (Page) BufferGetPage(buf);
+
+ if (PageIsNew(page))
+ stats.zero_pages++;
+ else if (PageGetSpecialSize(page) !=
+ MAXALIGN(sizeof(HashPageOpaqueData)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" contains corrupted page at block %u",
+ RelationGetRelationName(rel),
+ BufferGetBlockNumber(buf))));
+ else
+ {
+ opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+ if (opaque->hasho_flag & LH_BUCKET_PAGE)
+ {
+ stats.bucket_pages++;
+ GetHashPageStats(page, &stats);
+ }
+ else if (opaque->hasho_flag & LH_OVERFLOW_PAGE)
+ {
+ stats.overflow_pages++;
+ GetHashPageStats(page, &stats);
+ }
+ else if (opaque->hasho_flag & LH_BITMAP_PAGE)
+ stats.bitmap_pages++;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("unexpected page type 0x%04X in HASH index \"%s\" block %u",
+ opaque->hasho_flag, RelationGetRelationName(rel),
+ BufferGetBlockNumber(buf))));
+ }
+ UnlockReleaseBuffer(buf);
+ }
+
+ /* Done accessing the index */
+ index_close(rel, AccessShareLock);
+
+ /* Count zero pages as free space. */
+ stats.free_space += stats.zero_pages * stats.space_per_page;
+
+ /*
+ * Total space available for tuples excludes the metapage and the bitmap
+ * pages.
+ */
+ total_space = (nblocks - (stats.bitmap_pages + 1)) * stats.space_per_page;
+
+ if (total_space == 0)
+ free_percent = 0.0;
+ else
+ free_percent = 100.0 * stats.free_space / total_space;
+
+ /*
+ * Build a tuple descriptor for our result type
+ */
+ if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
+ elog(ERROR, "return type must be a row type");
+
+ tupleDesc = BlessTupleDesc(tupleDesc);
+
+ /*
+ * Build and return the tuple
+ */
+ MemSet(nulls, 0, sizeof(nulls));
+ values[0] = Int32GetDatum(stats.version);
+ values[1] = Int64GetDatum((int64) stats.bucket_pages);
+ values[2] = Int64GetDatum((int64) stats.overflow_pages);
+ values[3] = Int64GetDatum((int64) stats.bitmap_pages);
+ values[4] = Int64GetDatum((int64) stats.zero_pages);
+ values[5] = Int64GetDatum(stats.live_items);
+ values[6] = Int64GetDatum(stats.dead_items);
+ values[7] = Float8GetDatum(free_percent);
+ tuple = heap_form_tuple(tupleDesc, values, nulls);
+
+ PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
+}
+
+/* -------------------------------------------------
+ * GetHashPageStatis()
+ *
+ * Collect statistics of single hash page
+ * -------------------------------------------------
+ */
+static void
+GetHashPageStats(Page page, HashIndexStat *stats)
+{
+ OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+ int off;
+
+ /* count live and dead tuples, and free space */
+ for (off = FirstOffsetNumber; off <= maxoff; off++)
+ {
+ ItemId id = PageGetItemId(page, off);
+
+ if (!ItemIdIsDead(id))
+ stats->live_items++;
+ else
+ stats->dead_items++;
+ }
+ stats->free_space += PageGetExactFreeSpace(page);
+}
diff --git a/contrib/pgstattuple/pgstattuple--1.4--1.5.sql b/contrib/pgstattuple/pgstattuple--1.4--1.5.sql
index 65d7f19c2a..84e112e1c2 100644
--- a/contrib/pgstattuple/pgstattuple--1.4--1.5.sql
+++ b/contrib/pgstattuple/pgstattuple--1.4--1.5.sql
@@ -109,3 +109,19 @@ AS 'MODULE_PATHNAME', 'pgstattuple_approx_v1_5'
LANGUAGE C STRICT PARALLEL SAFE;
REVOKE EXECUTE ON FUNCTION pgstattuple_approx(regclass) FROM PUBLIC;
+
+/* New stuff in 1.5 begins here */
+
+CREATE OR REPLACE FUNCTION pgstathashindex(IN relname regclass,
+ OUT version INTEGER,
+ OUT bucket_pages BIGINT,
+ OUT overflow_pages BIGINT,
+ OUT bitmap_pages BIGINT,
+ OUT zero_pages BIGINT,
+ OUT live_items BIGINT,
+ OUT dead_items BIGINT,
+ OUT free_percent FLOAT8)
+AS 'MODULE_PATHNAME', 'pgstathashindex'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+REVOKE EXECUTE ON FUNCTION pgstathashindex(regclass) FROM PUBLIC;
diff --git a/contrib/pgstattuple/sql/pgstattuple.sql b/contrib/pgstattuple/sql/pgstattuple.sql
index d22c9f1c46..81fd5d693b 100644
--- a/contrib/pgstattuple/sql/pgstattuple.sql
+++ b/contrib/pgstattuple/sql/pgstattuple.sql
@@ -47,3 +47,7 @@ select pg_relpages(relname) from pg_class where relname = 'test_pkey';
create index test_ginidx on test using gin (b);
select * from pgstatginindex('test_ginidx');
+
+create index test_hashidx on test using hash (b);
+
+select * from pgstathashindex('test_hashidx');
diff --git a/doc/src/sgml/pgstattuple.sgml b/doc/src/sgml/pgstattuple.sgml
index d2fa524d6e..62b1a6f479 100644
--- a/doc/src/sgml/pgstattuple.sgml
+++ b/doc/src/sgml/pgstattuple.sgml
@@ -352,6 +352,101 @@ pending_tuples | 0
+
+
+
+ pgstathashindex
+
+ pgstathashindex(regclass) returns record>
+
+
+
+
+ pgstathashindex returns a record showing information
+ about a HASH index. For example:
+
+test=> select * from pgstathashindex('con_hash_index');
+-[ RECORD 1 ]--+-----------------
+version | 2
+bucket_pages | 33081
+overflow_pages | 0
+bitmap_pages | 1
+zero_pages | 32455
+live_items | 10204006
+dead_items | 0
+free_percent | 61.8005949100872
+
+
+
+
+ The output columns are:
+
+
+
+
+
+ Column
+ Type
+ Description
+
+
+
+
+
+ version
+ integer
+ HASH version number
+
+
+
+ bucket_pages
+ bigint
+ Number of bucket pages
+
+
+
+ overflow_pages
+ bigint
+ Number of overflow pages
+
+
+
+ bitmap_pages
+ bigint
+ Number of bitmap pages
+
+
+
+ zero_pages
+ bigint
+ Number of new or zero pages
+
+
+
+ live_items
+ bigint
+ Number of live tuples
+
+
+
+ dead_tuples
+ bigint
+ Number of dead tuples
+
+
+
+ free_percent
+ float
+ Percentage of free space
+
+
+
+
+
+
+
+
+
--
cgit v1.2.3
From 9863017b87f3592ff663d03fc663a4f1f8fdb8b2 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Sat, 4 Feb 2017 12:51:25 -0500
Subject: First-draft release notes for 9.6.2.
As usual, the release notes for other branches will be made by cutting
these down, but put them up for community review first.
---
doc/src/sgml/release-9.6.sgml | 1441 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 1441 insertions(+)
(limited to 'doc/src')
diff --git a/doc/src/sgml/release-9.6.sgml b/doc/src/sgml/release-9.6.sgml
index 1ee136ab96..97d1f51515 100644
--- a/doc/src/sgml/release-9.6.sgml
+++ b/doc/src/sgml/release-9.6.sgml
@@ -1,6 +1,1447 @@
+
+ Release 9.6.2
+
+
+ Release Date
+ 2017-02-09
+
+
+
+ This release contains a variety of fixes from 9.6.1.
+ For information about new features in the 9.6 major release, see
+ .
+
+
+
+ Migration to Version 9.6.2
+
+
+ A dump/restore is not required for those running 9.6.X.
+
+
+
+ However, if you are upgrading from a version earlier than 9.6.1,
+ see .
+
+
+
+
+ Changes
+
+
+
+
+
+
+ Ensure that the special snapshot used for catalog scans is not
+ invalidated by premature data pruning (Tom Lane)
+
+
+
+ Backends failed to account for this snapshot when advertising their
+ oldest xmin, potentially allowing concurrent vacuuming operations to
+ remove data that was still needed. This led to transient failures
+ along the lines of cache lookup failed for relation 1255>.
+
+
+
+
+
+
+ Fix incorrect WAL logging for BRIN indexes (Kuntal Ghosh)
+
+
+
+ The WAL record emitted for a BRIN revmap> page when moving an
+ index tuple to a different page was incorrect. Replay would make the
+ related portion of the index useless, forcing it to be recomputed.
+
+
+
+
+
+
+ Unconditionally WAL-log creation of the init fork> for an
+ unlogged table (Michael Paquier)
+
+
+
+ Previously, this was skipped when
+ = minimal>, but actually it's necessary even in that case
+ to ensure that the unlogged table is properly reset to empty after a
+ crash.
+
+
+
+
+
+
+ Fix WAL page header validation when re-reading segments (Takayuki
+ Tsunakawa, Amit Kapila)
+
+
+
+ In corner cases, a spurious out-of-sequence TLI> error
+ could be reported during recovery.
+
+
+
+
+
+
+ Reduce interlocking on standby servers during the replay of btree
+ index vacuuming operations (Simon Riggs)
+
+
+
+ This change avoids substantial replication delays that sometimes
+ occurred while replaying such operations.
+
+
+
+
+
+
+ If the stats collector dies during hot standby, restart it (Takayuki
+ Tsunakawa)
+
+
+
+
+
+
+ Ensure that hot standby feedback works correctly when it's enabled at
+ standby server start (Ants Aasma, Craig Ringer)
+
+
+
+
+
+
+ Check for interrupts while hot standby is waiting for a conflicting
+ query (Simon Riggs)
+
+
+
+
+
+
+ Avoid constantly respawning the autovacuum launcher in a corner case
+ (Amit Khandekar)
+
+
+
+ This fix avoids problems when autovacuum is nominally off and there
+ are some tables that require freezing, but all such tables are
+ already being processed by autovacuum workers.
+
+
+
+
+
+
+ Disallow setting the num_sync> field to zero in
+ (Fujii Masao)
+
+
+
+ The correct way to disable synchronous standby is to set the whole
+ value to an empty string.
+
+
+
+
+
+
+ Don't count background worker processes against a user's connection
+ limit (David Rowley)
+
+
+
+
+
+
+ Fix check for when an extension member object can be dropped (Tom Lane)
+
+
+
+ Extension upgrade scripts should be able to drop member objects,
+ but this was disallowed for serial-column sequences, and possibly
+ other cases.
+
+
+
+
+
+
+ Fix tracking of initial privileges for extension member objects so
+ that it works correctly with ALTER EXTENSION ... ADD/DROP>
+ (Stephen Frost)
+
+
+
+ An object's current privileges at the time it is added to the
+ extension will now be considered its default privileges; only
+ later changes in its privileges will be dumped by
+ subsequent pg_dump> runs.
+
+
+
+
+
+
+ Make sure ALTER TABLE> preserves index tablespace
+ assignments when rebuilding indexes (Tom Lane, Michael Paquier)
+
+
+
+ Previously, non-default settings
+ of could result in broken
+ indexes.
+
+
+
+
+
+
+ Fix incorrect updating of trigger function properties when changing a
+ foreign-key constraint's deferrability properties with ALTER
+ TABLE ... ALTER CONSTRAINT> (Tom Lane)
+
+
+
+ This led to odd failures during subsequent exercise of the foreign
+ key, as the triggers were fired at the wrong times.
+
+
+
+
+
+
+ Prevent dropping a foreign-key constraint if there are pending
+ trigger events for the referenced relation (Tom Lane)
+
+
+
+ This avoids could not find trigger NNN>
+ or relation NNN> has no triggers errors.
+
+
+
+
+
+
+ Fix ALTER TABLE ... SET DATA TYPE ... USING> when child
+ table has different column ordering than the parent
+ (Álvaro Herrera)
+
+
+
+ Failure to adjust the column numbering in the USING>
+ expression led to errors,
+ typically attribute N> has wrong type.
+
+
+
+
+
+
+ Fix processing of OID column when a table with OIDs is associated to
+ a parent with OIDs via ALTER TABLE ... INHERIT> (Amit
+ Langote)
+
+
+
+ The OID column should be treated the same as regular user columns in
+ this case, but it wasn't, leading to odd behavior in later
+ inheritance changes.
+
+
+
+
+
+
+ Ensure that CREATE TABLE ... LIKE ... WITH OIDS> creates
+ a table with OIDs, whether or not the LIKE>-referenced
+ table(s) have OIDs (Tom Lane)
+
+
+
+
+
+
+ Fix CREATE OR REPLACE VIEW> to update the view query
+ before attempting to apply the new view options (Dean Rasheed)
+
+
+
+ Previously the command would fail if the new options were
+ inconsistent with the old view definition.
+
+
+
+
+
+
+ Report correct object identity during ALTER TEXT SEARCH
+ CONFIGURATION> (Artur Zakirov)
+
+
+
+ The wrong catalog OID was reported to extensions such as logical
+ decoding.
+
+
+
+
+
+
+ Fix commit timestamp mechanism to not fail when queried about
+ the special XIDs FrozenTransactionId>
+ and BootstrapTransactionId> (Craig Ringer)
+
+
+
+
+
+
+ Check for serializability conflicts before reporting
+ constraint-violation failures (Thomas Munro)
+
+
+
+ When using serializable transaction isolation, it is desirable
+ that any error due to concurrent transactions should manifest
+ as a serialization failure, thereby cueing the application that
+ a retry might succeed. Unfortunately, this does not reliably
+ happen for duplicate-key failures caused by concurrent insertions.
+ This change ensures that such an error will be reported as a
+ serialization error if the application explicitly checked for
+ the presence of a conflicting key (and did not find it) earlier
+ in the transaction.
+
+
+
+
+
+
+ Fix incorrect use of view reloptions as regular table reloptions (Tom
+ Lane)
+
+
+
+ The symptom was spurious ON CONFLICT is not supported on table
+ ... used as a catalog table> errors when the target
+ of INSERT ... ON CONFLICT> is a view with cascade option.
+
+
+
+
+
+
+ Fix incorrect target lists can have at most N>
+ entries complaint when using ON CONFLICT> with
+ wide tables (Tom Lane)
+
+
+
+
+
+
+ Fix spurious query provides a value for a dropped column>
+ errors during INSERT> or UPDATE> on a table
+ with a dropped column (Tom Lane)
+
+
+
+
+
+
+ Prevent multicolumn expansion of foo>.*> in
+ an UPDATE> source expression (Tom Lane)
+
+
+
+ This led to UPDATE target count mismatch --- internal
+ error>. Now the syntax is understood as a whole-row variable,
+ as it would be in other contexts.
+
+
+
+
+
+
+ Ensure that column typmods are determined accurately for
+ multi-row VALUES> constructs (Tom Lane)
+
+
+
+ This fixes problems occurring when the first value in a column has a
+ determinable typmod (e.g., length for a varchar> value) but
+ later values don't share the same limit.
+
+
+
+
+
+
+ Throw error for an unfinished Unicode surrogate pair at the end of a
+ Unicode string (Tom Lane)
+
+
+
+ Normally, a Unicode surrogate leading character must be followed by a
+ Unicode surrogate trailing character, but the check for this was
+ missed if the leading character was the last character in a Unicode
+ string literal (U&'...'>) or Unicode identifier
+ (U&"...">).
+
+
+
+
+
+
+ Fix execution of DISTINCT> and ordered aggregates when
+ multiple such aggregates are able to share the same transition state
+ (Heikki Linnakangas)
+
+
+
+
+
+
+ Fix implementation of phrase search operators in tsquery>
+ (Tom Lane)
+
+
+
+ Remove incorrect, and inconsistently-applied, rewrite rules that
+ tried to transform away AND/OR/NOT operators appearing below a PHRASE
+ operator; instead upgrade the execution engine to handle such cases
+ correctly. This fixes assorted strange behavior and possible crashes
+ for text search queries containing such combinations. Also fix
+ nested PHRASE operators to work sanely in combinations other than
+ simple left-deep trees, correct the behavior when removing stopwords
+ from a phrase search clause, and make sure that index searches behave
+ consistently with simple sequential-scan application of such queries.
+
+
+
+
+
+
+ Ensure that a purely negative text search query, such
+ as !foo>, matches empty tsvector>s (Tom Dunstan)
+
+
+
+ Such matches were found by GIN index searches, but not by sequential
+ scans or GiST index searches.
+
+
+
+
+
+
+ Prevent crash when ts_rewrite()> replaces a non-top-level
+ subtree with an empty query (Artur Zakirov)
+
+
+
+
+
+
+ Fix performance problems in ts_rewrite()> (Tom Lane)
+
+
+
+
+
+
+ Fix ts_rewrite()>'s handling of nested NOT operators
+ (Tom Lane)
+
+
+
+
+
+
+ Improve speed of user-defined aggregates that
+ use array_append()> as transition function (Tom Lane)
+
+
+
+
+
+
+ Fix array_fill()> to handle empty arrays properly (Tom Lane)
+
+
+
+
+
+
+ Fix possible crash in array_position()>
+ or array_positions()> when processing arrays of records
+ (Junseok Yang)
+
+
+
+
+
+
+ Fix one-byte buffer overrun in quote_literal_cstr()>
+ (Heikki Linnakangas)
+
+
+
+ The overrun occurred only if the input consisted entirely of single
+ quotes and/or backslashes.
+
+
+
+
+
+
+ Prevent multiple calls of pg_start_backup()>
+ and pg_stop_backup()> from running concurrently (Michael
+ Paquier)
+
+
+
+ This avoids an assertion failure, and possibly worse things, if
+ someone tries to run these functions in parallel.
+
+
+
+
+
+
+ Disable transform that attempted to remove no-op AT TIME
+ ZONE> conversions (Tom Lane)
+
+
+
+ This resulted in wrong answers when the simplified expression was
+ used in an index condition.
+
+
+
+
+
+
+ Avoid discarding interval>-to-interval> casts
+ that aren't really no-ops (Tom Lane)
+
+
+
+ In some cases, a cast that should result in zeroing out
+ low-order interval> fields was mistakenly deemed to be a
+ no-op and discarded. An example is that casting from INTERVAL
+ MONTH> to INTERVAL YEAR> failed to clear the months field.
+
+
+
+
+
+
+ Fix crash if the number of workers available to a parallel query
+ decreases during a rescan (Andreas Seltenreich)
+
+
+
+
+
+
+ Fix bugs in transmitting GUC parameter values to parallel workers
+ (Michael Paquier, Tom Lane)
+
+
+
+
+
+
+ Allow statements prepared with PREPARE> to be given
+ parallel plans (Amit Kapila, Tobias Bussman)
+
+
+
+
+
+
+ Fix incorrect generation of parallel plans for semi-joins (Tom Lane)
+
+
+
+
+
+
+ Fix planner's cardinality estimates for parallel joins (Robert Haas)
+
+
+
+ Ensure that these estimates reflect the number of rows predicted to
+ be seen by each worker, rather than the total.
+
+
+
+
+
+
+ Fix planner to avoid trying to parallelize plan nodes containing
+ initplans or subplans (Tom Lane, Amit Kapila)
+
+
+
+
+
+
+ Ensure that cached plans are invalidated by changes in foreign-table
+ options (Amit Langote, Etsuro Fujita, Ashutosh Bapat)
+
+
+
+
+
+
+ Fix the plan generated for sorted partial aggregation with a constant
+ GROUP BY> clause (Tom Lane)
+
+
+
+
+
+
+ Fix could not find plan for CTE> planner error when dealing
+ with a UNION ALL> containing CTE references (Tom Lane)
+
+
+
+
+
+
+ Fix mishandling of initplans when forcibly adding a Material node to
+ a subplan (Tom Lane)
+
+
+
+ The typical consequence of this mistake was a plan should not
+ reference subplan's variable> error.
+
+
+
+
+
+
+ Fix foreign-key-based join selectivity estimation for semi-joins and
+ anti-joins, as well as inheritance cases (Tom Lane)
+
+
+
+ The new code for taking the existence of a foreign key relationship
+ into account did the wrong thing in these cases, making the estimates
+ worse not better than the pre-9.6 code.
+
+
+
+
+
+
+ Fix pg_dump> to emit the data of a sequence that is
+ marked as an extension configuration table (Michael Paquier)
+
+
+
+
+
+
+ Fix mishandling of ALTER DEFAULT PRIVILEGES ... REVOKE>
+ in pg_dump> (Stephen Frost)
+
+
+
+ pg_dump> missed issuing the
+ required REVOKE> commands in cases where ALTER
+ DEFAULT PRIVILEGES> had been used to reduce privileges to less than
+ they would normally be.
+
+
+
+
+
+
+ Fix pg_dump> to dump user-defined casts and transforms
+ that use built-in functions (Stephen Frost)
+
+
+
+
+
+
+ Fix pg_restore> with
--create --if-exists>
+ to behave more sanely if an archive contains
+ unrecognized DROP> commands (Tom Lane)
+
+
+
+ This doesn't fix any live bug, but it may improve the behavior in
+ future if pg_restore> is used with an archive
+ generated by a later pg_dump> version.
+
+
+
+
+
+
+ Fix pg_basebackup>'s rate limiting in the presence of
+ slow I/O (Antonin Houska)
+
+
+
+ If disk I/O was transiently much slower than the specified rate
+ limit, the calculation overflowed, effectively disabling the rate
+ limit for the rest of the run.
+
+
+
+
+
+
+ Fix pg_basebackup>'s handling of
+ symlinked pg_stat_tmp> and pg_replslot>
+ subdirectories (Magnus Hagander, Michael Paquier)
+
+
+
+
+
+
+ Fix possible pg_basebackup> failure on standby
+ server when including WAL files (Amit Kapila, Robert Haas)
+
+
+
+
+
+
+ Improve initdb> to insert the correct
+ platform-specific default values for
+ the xxx>_flush_after> parameters
+ into postgresql.conf> (Fabien Coelho, Tom Lane)
+
+
+
+ This is a cleaner way of documenting the default values than was used
+ previously.
+
+
+
+
+
+
+ Fix possible mishandling of expanded arrays in domain check
+ constraints and CASE> execution (Tom Lane)
+
+
+
+ It was possible for a PL/pgSQL function invoked in these contexts to
+ modify or even delete an array value that needs to be preserved for
+ additional operations.
+
+
+
+
+
+
+ Fix nested uses of PL/pgSQL functions in contexts such as domain
+ check constraints evaluated during assignment to a PL/pgSQL variable
+ (Tom Lane)
+
+
+
+
+
+
+ Ensure that the Python exception objects we create for PL/Python are
+ properly reference-counted (Rafa de la Torre, Tom Lane)
+
+
+
+ This avoids failures if the objects are used after a Python garbage
+ collection cycle has occurred.
+
+
+
+
+
+
+ Fix PL/Tcl to support triggers on tables that have .tupno>
+ as a column name (Tom Lane)
+
+
+
+ This matches the (previously undocumented) behavior of
+ PL/Tcl's spi_exec> and spi_execp> commands,
+ namely that a magic .tupno> column is inserted only if
+ there isn't a real column named that.
+
+
+
+
+
+
+ Allow DOS-style line endings in ~/.pgpass> files,
+ even on Unix (Vik Fearing)
+
+
+
+ This change simplifies use of the same password file across Unix and
+ Windows machines.
+
+
+
+
+
+
+ Fix one-byte buffer overrun if ecpg> is given a file
+ name that ends with a dot (Takayuki Tsunakawa)
+
+
+
+
+
+
+ Fix incorrect error reporting for duplicate data
+ in psql>'s \crosstabview> (Tom Lane)
+
+
+
+ psql> sometimes quoted the wrong row and/or column
+ values when complaining about multiple entries for the same crosstab
+ cell.
+
+
+
+
+
+
+ Fix psql>'s tab completion for ALTER DEFAULT
+ PRIVILEGES> (Gilles Darold, Stephen Frost)
+
+
+
+
+
+
+ Fix psql>'s tab completion for ALTER TABLE t
+ ALTER c DROP ...> (Kyotaro Horiguchi)
+
+
+
+
+
+
+ In psql>, treat an empty or all-blank setting of
+ the PAGER> environment variable as meaning no
+ pager> (Tom Lane)
+
+
+
+ Previously, such a setting caused output intended for the pager to
+ vanish entirely.
+
+
+
+
+
+
+ Improve contrib/dblink>'s reporting of
+ low-level libpq> errors, such as out-of-memory
+ (Joe Conway)
+
+
+
+
+
+
+ Teach contrib/dblink> to ignore irrelevant server options
+ when it uses a contrib/postgres_fdw> foreign server as
+ the source of connection options (Corey Huinker)
+
+
+
+ Previously, if the foreign server object had options that were not
+ also libpq> connection options, an error occurred.
+
+
+
+
+
+
+ Fix portability problems in contrib/pageinspect>'s
+ functions for GIN indexes (Peter Eisentraut, Tom Lane)
+
+
+
+
+
+
+ Fix possible miss of socket read events while waiting on Windows
+ (Amit Kapial)
+
+
+
+ This error was harmless for most uses, but it is known to cause hangs
+ when trying to use the pldebugger extension.
+
+
+
+
+
+
+ On Windows, ensure that environment variable changes are propagated
+ to DLLs built with debug options (Christian Ullrich)
+
+
+
+
+
+
+ Sync our copy of the timezone library with IANA release tzcode2016j
+ (Tom Lane)
+
+
+
+ This fixes various issues, most notably that timezone data
+ installation failed if the target directory didn't support hard
+ links.
+
+
+
+
+
+
+ Update time zone data files to tzdata> release 2016j
+ for DST law changes in northern Cyprus (adding a new zone
+ Asia/Famagusta), Russia (adding a new zone Europe/Saratov), Tonga,
+ and Antarctica/Casey.
+ Historical corrections for Italy, Kazakhstan, Malta, and Palestine.
+ Switch to preferring numeric zone abbreviations for Tonga.
+
+
+
+
+
+
+
+
Release 9.6.1
--
cgit v1.2.3
From 549f74733f45804cd3180de853e5d0610eecc22f Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Thu, 2 Feb 2017 14:57:46 -0500
Subject: doc: Update CREATE DATABASE examples
The example of using CREATE DATABASE with the ENCODING option did not
work anymore (except in special circumstances) and did not represent a
good general-purpose example, so write some new examples.
Reported-by: marc+pgsql@milestonerdl.com
---
doc/src/sgml/ref/create_database.sgml | 30 +++++++++++++++++++++++-------
1 file changed, 23 insertions(+), 7 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/ref/create_database.sgml b/doc/src/sgml/ref/create_database.sgml
index cf0d53b301..48386a29f9 100644
--- a/doc/src/sgml/ref/create_database.sgml
+++ b/doc/src/sgml/ref/create_database.sgml
@@ -284,17 +284,33 @@ CREATE DATABASE sales OWNER salesapp TABLESPACE salesspace;
- To create a database music> which supports the ISO-8859-1
- character set:
+ To create a database music> with a different locale:
+
+CREATE DATABASE music
+ LC_COLLATE 'sv_SE.utf8' LC_CTYPE 'sv_SE.utf8'
+ TEMPLATE template0;
+
+ In this example, the TEMPLATE template0> clause is required if
+ the specified locale is different from the one in template1>.
+ (If it is not, then specifying the locale explicitly is redundant.)
+
+
+ To create a database music2> with a different locale and a
+ different character set encoding:
-CREATE DATABASE music ENCODING 'LATIN1' TEMPLATE template0;
+CREATE DATABASE music2
+ LC_COLLATE 'sv_SE.iso885915' LC_CTYPE 'sv_SE.iso885915'
+ ENCODING LATIN9
+ TEMPLATE template0;
+ The specified locale and encoding settings must match, or an error will be
+ reported.
+
- In this example, the TEMPLATE template0> clause would only
- be required if template1>'s encoding is not ISO-8859-1.
- Note that changing encoding might require selecting new
- LC_COLLATE> and LC_CTYPE> settings as well.
+
+ Note that locale names are specific to the operating system, so that the
+ above commands might not work in the same way everywhere.
--
cgit v1.2.3
From ad6af3fc4256c0e1eecf5d93d510da4b44e0d480 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Mon, 6 Feb 2017 14:19:23 -0500
Subject: Release note updates.
Add item for last-minute CREATE INDEX CONCURRENTLY fix.
Repair a couple of misspellings of patch authors' names.
Back-branch updates will follow shortly, but I thought I'd
commit this separately just to make it more visible.
---
doc/src/sgml/release-9.6.sgml | 39 ++++++++++++++++++++++++++++++++++++---
1 file changed, 36 insertions(+), 3 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/release-9.6.sgml b/doc/src/sgml/release-9.6.sgml
index 97d1f51515..e6c4fa8f9a 100644
--- a/doc/src/sgml/release-9.6.sgml
+++ b/doc/src/sgml/release-9.6.sgml
@@ -23,7 +23,13 @@
- However, if you are upgrading from a version earlier than 9.6.1,
+ However, if your installation has been affected by the bug described in
+ the first changelog entry below, then after updating you may need
+ to take action to repair corrupted indexes.
+
+
+
+ Also, if you are upgrading from a version earlier than 9.6.1,
see .
@@ -36,6 +42,33 @@
+
+ Fix a race condition that could cause indexes built
+ with CREATE INDEX CONCURRENTLY> to be corrupt
+ (Pavan Deolasee, Tom Lane)
+
+
+
+ If CREATE INDEX CONCURRENTLY> was used to build an index
+ that depends on a column not previously indexed, then rows inserted
+ or updated by transactions that ran concurrently with
+ the CREATE INDEX> command could have received incorrect
+ index entries. If you suspect this may have happened, the most
+ reliable solution is to rebuild affected indexes after installing
+ this update.
+
+
+
+
+
Allow statements prepared with PREPARE> to be given
- parallel plans (Amit Kapila, Tobias Bussman)
+ parallel plans (Amit Kapila, Tobias Bussmann)
@@ -1352,7 +1385,7 @@ Branch: REL9_6_STABLE [b98e5513f] 2016-12-21 11:11:36 -0500
-->
Fix possible miss of socket read events while waiting on Windows
- (Amit Kapial)
+ (Amit Kapila)
--
cgit v1.2.3
From ab82340a43bebe57a3db0e52bb74120b3bb53ae5 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Mon, 6 Feb 2017 15:17:27 -0500
Subject: Avoid permission failure in pg_sequences.last_value
Before, reading pg_sequences.last_value would fail unless the user had
appropriate sequence permissions, which would make the pg_sequences view
cumbersome to use. Instead, return null instead of the real value when
there are no permissions.
From: Michael Paquier
Reported-by: Shinoda, Noriyoshi
---
doc/src/sgml/catalogs.sgml | 5 ++++-
src/backend/catalog/system_views.sql | 6 +++++-
src/test/regress/expected/rules.out | 5 ++++-
3 files changed, 13 insertions(+), 3 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 204b8cfd55..787cc10bf8 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -9882,7 +9882,10 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
The last sequence value written to disk. If caching is used,
this value can be greater than the last value handed out from the
- sequence. Null if the sequence has not been read from yet.
+ sequence. Null if the sequence has not been read from yet. Also, if
+ the current user does not have USAGE
+ or SELECT privilege on the sequence, the value is
+ null.
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 28be27a07e..907e0fb630 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -175,7 +175,11 @@ CREATE OR REPLACE VIEW pg_sequences AS
S.seqincrement AS increment_by,
S.seqcycle AS cycle,
S.seqcache AS cache_size,
- pg_sequence_last_value(C.oid) AS last_value
+ CASE
+ WHEN has_sequence_privilege(C.oid, 'SELECT,USAGE'::text)
+ THEN pg_sequence_last_value(C.oid)
+ ELSE NULL
+ END AS last_value
FROM pg_sequence S JOIN pg_class C ON (C.oid = S.seqrelid)
LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace)
WHERE NOT pg_is_other_temp_schema(N.oid)
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 60731a99b7..9c99a451ba 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1647,7 +1647,10 @@ pg_sequences| SELECT n.nspname AS schemaname,
s.seqincrement AS increment_by,
s.seqcycle AS cycle,
s.seqcache AS cache_size,
- pg_sequence_last_value((c.oid)::regclass) AS last_value
+ CASE
+ WHEN has_sequence_privilege(c.oid, 'SELECT,USAGE'::text) THEN pg_sequence_last_value((c.oid)::regclass)
+ ELSE NULL::bigint
+ END AS last_value
FROM ((pg_sequence s
JOIN pg_class c ON ((c.oid = s.seqrelid)))
LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace)))
--
cgit v1.2.3
From 696af9ab0a92642978169c227e187a65c2f35f17 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Mon, 6 Feb 2017 15:21:20 -0500
Subject: doc: Document sequence function privileges better
Document the privileges required for each of the sequence functions.
This was already in the GRANT reference page, but also add it to the
function description for easier reference.
---
doc/src/sgml/func.sgml | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
(limited to 'doc/src')
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index b214218791..9e2170ea4b 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -11723,6 +11723,10 @@ nextval('foo'::text) foo is looked up at
+
+ This function requires USAGE
+ or UPDATE privilege on the sequence.
+
@@ -11738,6 +11742,11 @@ nextval('foo'::text) foo is looked up at
other sessions have executed nextval since the
current session did.
+
+
+ This function requires USAGE
+ or SELECT privilege on the sequence.
+
@@ -11754,6 +11763,11 @@ nextval('foo'::text) foo is looked up at
lastval if nextval
has not yet been called in the current session.
+
+
+ This function requires USAGE
+ or SELECT privilege on the last used sequence.
+
@@ -11792,6 +11806,11 @@ SELECT setval('foo', 42, false); Next nextval> wi
back.
+
+
+ This function requires UPDATE privilege on the
+ sequence.
+
--
cgit v1.2.3
From 64ee636a5b4dc5796a582bf061d94ca2809f5ef3 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Mon, 6 Feb 2017 15:30:16 -0500
Subject: Release notes for 9.6.2, 9.5.6, 9.4.11, 9.3.16, 9.2.20.
---
doc/src/sgml/release-9.2.sgml | 412 ++++++++++++++++++++++++++
doc/src/sgml/release-9.3.sgml | 448 ++++++++++++++++++++++++++++
doc/src/sgml/release-9.4.sgml | 533 +++++++++++++++++++++++++++++++++
doc/src/sgml/release-9.5.sgml | 664 +++++++++++++++++++++++++++++++++++++++++-
doc/src/sgml/release-9.6.sgml | 61 +---
5 files changed, 2058 insertions(+), 60 deletions(-)
(limited to 'doc/src')
diff --git a/doc/src/sgml/release-9.2.sgml b/doc/src/sgml/release-9.2.sgml
index 49430389d9..2e24b778a8 100644
--- a/doc/src/sgml/release-9.2.sgml
+++ b/doc/src/sgml/release-9.2.sgml
@@ -1,6 +1,418 @@
+
+ Release 9.2.20
+
+
+ Release Date
+ 2017-02-09
+
+
+
+ This release contains a variety of fixes from 9.2.19.
+ For information about new features in the 9.2 major release, see
+ .
+
+
+
+ Migration to Version 9.2.20
+
+
+ A dump/restore is not required for those running 9.2.X.
+
+
+
+ However, if your installation has been affected by the bug described in
+ the first changelog entry below, then after updating you may need
+ to take action to repair corrupted indexes.
+
+
+
+ Also, if you are upgrading from a version earlier than 9.2.11,
+ see .
+
+
+
+
+
+ Changes
+
+
+
+
+
+ Fix a race condition that could cause indexes built
+ with CREATE INDEX CONCURRENTLY> to be corrupt
+ (Pavan Deolasee, Tom Lane)
+
+
+
+ If CREATE INDEX CONCURRENTLY> was used to build an index
+ that depends on a column not previously indexed, then rows inserted
+ or updated by transactions that ran concurrently with
+ the CREATE INDEX> command could have received incorrect
+ index entries. If you suspect this may have happened, the most
+ reliable solution is to rebuild affected indexes after installing
+ this update.
+
+
+
+
+
+ Unconditionally WAL-log creation of the init fork> for an
+ unlogged table (Michael Paquier)
+
+
+
+ Previously, this was skipped when
+ = minimal>, but actually it's necessary even in that case
+ to ensure that the unlogged table is properly reset to empty after a
+ crash.
+
+
+
+
+
+
+ Fix WAL page header validation when re-reading segments (Takayuki
+ Tsunakawa, Amit Kapila)
+
+
+
+ In corner cases, a spurious out-of-sequence TLI> error
+ could be reported during recovery.
+
+
+
+
+
+ If the stats collector dies during hot standby, restart it (Takayuki
+ Tsunakawa)
+
+
+
+
+
+ Check for interrupts while hot standby is waiting for a conflicting
+ query (Simon Riggs)
+
+
+
+
+
+ Avoid constantly respawning the autovacuum launcher in a corner case
+ (Amit Khandekar)
+
+
+
+ This fix avoids problems when autovacuum is nominally off and there
+ are some tables that require freezing, but all such tables are
+ already being processed by autovacuum workers.
+
+
+
+
+
+ Fix check for when an extension member object can be dropped (Tom Lane)
+
+
+
+ Extension upgrade scripts should be able to drop member objects,
+ but this was disallowed for serial-column sequences, and possibly
+ other cases.
+
+
+
+
+
+ Make sure ALTER TABLE> preserves index tablespace
+ assignments when rebuilding indexes (Tom Lane, Michael Paquier)
+
+
+
+ Previously, non-default settings
+ of could result in broken
+ indexes.
+
+
+
+
+
+ Prevent dropping a foreign-key constraint if there are pending
+ trigger events for the referenced relation (Tom Lane)
+
+
+
+ This avoids could not find trigger NNN>
+ or relation NNN> has no triggers errors.
+
+
+
+
+
+ Fix processing of OID column when a table with OIDs is associated to
+ a parent with OIDs via ALTER TABLE ... INHERIT> (Amit
+ Langote)
+
+
+
+ The OID column should be treated the same as regular user columns in
+ this case, but it wasn't, leading to odd behavior in later
+ inheritance changes.
+
+
+
+
+
+ Check for serializability conflicts before reporting
+ constraint-violation failures (Thomas Munro)
+
+
+
+ When using serializable transaction isolation, it is desirable
+ that any error due to concurrent transactions should manifest
+ as a serialization failure, thereby cueing the application that
+ a retry might succeed. Unfortunately, this does not reliably
+ happen for duplicate-key failures caused by concurrent insertions.
+ This change ensures that such an error will be reported as a
+ serialization error if the application explicitly checked for
+ the presence of a conflicting key (and did not find it) earlier
+ in the transaction.
+
+
+
+
+
+ Ensure that column typmods are determined accurately for
+ multi-row VALUES> constructs (Tom Lane)
+
+
+
+ This fixes problems occurring when the first value in a column has a
+ determinable typmod (e.g., length for a varchar> value) but
+ later values don't share the same limit.
+
+
+
+
+
+ Throw error for an unfinished Unicode surrogate pair at the end of a
+ Unicode string (Tom Lane)
+
+
+
+ Normally, a Unicode surrogate leading character must be followed by a
+ Unicode surrogate trailing character, but the check for this was
+ missed if the leading character was the last character in a Unicode
+ string literal (U&'...'>) or Unicode identifier
+ (U&"...">).
+
+
+
+
+
+ Ensure that a purely negative text search query, such
+ as !foo>, matches empty tsvector>s (Tom Dunstan)
+
+
+
+ Such matches were found by GIN index searches, but not by sequential
+ scans or GiST index searches.
+
+
+
+
+
+ Prevent crash when ts_rewrite()> replaces a non-top-level
+ subtree with an empty query (Artur Zakirov)
+
+
+
+
+
+ Fix performance problems in ts_rewrite()> (Tom Lane)
+
+
+
+
+
+ Fix ts_rewrite()>'s handling of nested NOT operators
+ (Tom Lane)
+
+
+
+
+
+ Fix array_fill()> to handle empty arrays properly (Tom Lane)
+
+
+
+
+
+ Fix one-byte buffer overrun in quote_literal_cstr()>
+ (Heikki Linnakangas)
+
+
+
+ The overrun occurred only if the input consisted entirely of single
+ quotes and/or backslashes.
+
+
+
+
+
+ Prevent multiple calls of pg_start_backup()>
+ and pg_stop_backup()> from running concurrently (Michael
+ Paquier)
+
+
+
+ This avoids an assertion failure, and possibly worse things, if
+ someone tries to run these functions in parallel.
+
+
+
+
+
+ Avoid discarding interval>-to-interval> casts
+ that aren't really no-ops (Tom Lane)
+
+
+
+ In some cases, a cast that should result in zeroing out
+ low-order interval> fields was mistakenly deemed to be a
+ no-op and discarded. An example is that casting from INTERVAL
+ MONTH> to INTERVAL YEAR> failed to clear the months field.
+
+
+
+
+
+ Fix pg_dump> to dump user-defined casts and transforms
+ that use built-in functions (Stephen Frost)
+
+
+
+
+
+ Fix possible pg_basebackup> failure on standby
+ server when including WAL files (Amit Kapila, Robert Haas)
+
+
+
+
+
+ Ensure that the Python exception objects we create for PL/Python are
+ properly reference-counted (Rafa de la Torre, Tom Lane)
+
+
+
+ This avoids failures if the objects are used after a Python garbage
+ collection cycle has occurred.
+
+
+
+
+
+ Fix PL/Tcl to support triggers on tables that have .tupno>
+ as a column name (Tom Lane)
+
+
+
+ This matches the (previously undocumented) behavior of
+ PL/Tcl's spi_exec> and spi_execp> commands,
+ namely that a magic .tupno> column is inserted only if
+ there isn't a real column named that.
+
+
+
+
+
+ Allow DOS-style line endings in ~/.pgpass> files,
+ even on Unix (Vik Fearing)
+
+
+
+ This change simplifies use of the same password file across Unix and
+ Windows machines.
+
+
+
+
+
+ Fix one-byte buffer overrun if ecpg> is given a file
+ name that ends with a dot (Takayuki Tsunakawa)
+
+
+
+
+
+ Fix psql>'s tab completion for ALTER DEFAULT
+ PRIVILEGES> (Gilles Darold, Stephen Frost)
+
+
+
+
+
+ In psql>, treat an empty or all-blank setting of
+ the PAGER> environment variable as meaning no
+ pager> (Tom Lane)
+
+
+
+ Previously, such a setting caused output intended for the pager to
+ vanish entirely.
+
+
+
+
+
+ Improve contrib/dblink>'s reporting of
+ low-level libpq> errors, such as out-of-memory
+ (Joe Conway)
+
+
+
+
+
+ On Windows, ensure that environment variable changes are propagated
+ to DLLs built with debug options (Christian Ullrich)
+
+
+
+
+
+ Sync our copy of the timezone library with IANA release tzcode2016j
+ (Tom Lane)
+
+
+
+ This fixes various issues, most notably that timezone data
+ installation failed if the target directory didn't support hard
+ links.
+
+
+
+
+
+ Update time zone data files to tzdata> release 2016j
+ for DST law changes in northern Cyprus (adding a new zone
+ Asia/Famagusta), Russia (adding a new zone Europe/Saratov), Tonga,
+ and Antarctica/Casey.
+ Historical corrections for Italy, Kazakhstan, Malta, and Palestine.
+ Switch to preferring numeric zone abbreviations for Tonga.
+
+
+
+
+
+
+
+
Release 9.2.19
diff --git a/doc/src/sgml/release-9.3.sgml b/doc/src/sgml/release-9.3.sgml
index 81205a40c7..31adad9d47 100644
--- a/doc/src/sgml/release-9.3.sgml
+++ b/doc/src/sgml/release-9.3.sgml
@@ -1,6 +1,454 @@
+
+ Release 9.3.16
+
+
+ Release Date
+ 2017-02-09
+
+
+
+ This release contains a variety of fixes from 9.3.15.
+ For information about new features in the 9.3 major release, see
+ .
+
+
+
+ Migration to Version 9.3.16
+
+
+ A dump/restore is not required for those running 9.3.X.
+
+
+
+ However, if your installation has been affected by the bug described in
+ the first changelog entry below, then after updating you may need
+ to take action to repair corrupted indexes.
+
+
+
+ Also, if you are upgrading from a version earlier than 9.3.15,
+ see .
+
+
+
+
+
+ Changes
+
+
+
+
+
+ Fix a race condition that could cause indexes built
+ with CREATE INDEX CONCURRENTLY> to be corrupt
+ (Pavan Deolasee, Tom Lane)
+
+
+
+ If CREATE INDEX CONCURRENTLY> was used to build an index
+ that depends on a column not previously indexed, then rows inserted
+ or updated by transactions that ran concurrently with
+ the CREATE INDEX> command could have received incorrect
+ index entries. If you suspect this may have happened, the most
+ reliable solution is to rebuild affected indexes after installing
+ this update.
+
+
+
+
+
+ Unconditionally WAL-log creation of the init fork> for an
+ unlogged table (Michael Paquier)
+
+
+
+ Previously, this was skipped when
+ = minimal>, but actually it's necessary even in that case
+ to ensure that the unlogged table is properly reset to empty after a
+ crash.
+
+
+
+
+
+ If the stats collector dies during hot standby, restart it (Takayuki
+ Tsunakawa)
+
+
+
+
+
+ Ensure that hot standby feedback works correctly when it's enabled at
+ standby server start (Ants Aasma, Craig Ringer)
+
+
+
+
+
+ Check for interrupts while hot standby is waiting for a conflicting
+ query (Simon Riggs)
+
+
+
+
+
+ Avoid constantly respawning the autovacuum launcher in a corner case
+ (Amit Khandekar)
+
+
+
+ This fix avoids problems when autovacuum is nominally off and there
+ are some tables that require freezing, but all such tables are
+ already being processed by autovacuum workers.
+
+
+
+
+
+ Fix check for when an extension member object can be dropped (Tom Lane)
+
+
+
+ Extension upgrade scripts should be able to drop member objects,
+ but this was disallowed for serial-column sequences, and possibly
+ other cases.
+
+
+
+
+
+ Make sure ALTER TABLE> preserves index tablespace
+ assignments when rebuilding indexes (Tom Lane, Michael Paquier)
+
+
+
+ Previously, non-default settings
+ of could result in broken
+ indexes.
+
+
+
+
+
+ Prevent dropping a foreign-key constraint if there are pending
+ trigger events for the referenced relation (Tom Lane)
+
+
+
+ This avoids could not find trigger NNN>
+ or relation NNN> has no triggers errors.
+
+
+
+
+
+ Fix processing of OID column when a table with OIDs is associated to
+ a parent with OIDs via ALTER TABLE ... INHERIT> (Amit
+ Langote)
+
+
+
+ The OID column should be treated the same as regular user columns in
+ this case, but it wasn't, leading to odd behavior in later
+ inheritance changes.
+
+
+
+
+
+ Report correct object identity during ALTER TEXT SEARCH
+ CONFIGURATION> (Artur Zakirov)
+
+
+
+ The wrong catalog OID was reported to extensions such as logical
+ decoding.
+
+
+
+
+
+ Check for serializability conflicts before reporting
+ constraint-violation failures (Thomas Munro)
+
+
+
+ When using serializable transaction isolation, it is desirable
+ that any error due to concurrent transactions should manifest
+ as a serialization failure, thereby cueing the application that
+ a retry might succeed. Unfortunately, this does not reliably
+ happen for duplicate-key failures caused by concurrent insertions.
+ This change ensures that such an error will be reported as a
+ serialization error if the application explicitly checked for
+ the presence of a conflicting key (and did not find it) earlier
+ in the transaction.
+
+
+
+
+
+ Prevent multicolumn expansion of foo>.*> in
+ an UPDATE> source expression (Tom Lane)
+
+
+
+ This led to UPDATE target count mismatch --- internal
+ error>. Now the syntax is understood as a whole-row variable,
+ as it would be in other contexts.
+
+
+
+
+
+ Ensure that column typmods are determined accurately for
+ multi-row VALUES> constructs (Tom Lane)
+
+
+
+ This fixes problems occurring when the first value in a column has a
+ determinable typmod (e.g., length for a varchar> value) but
+ later values don't share the same limit.
+
+
+
+
+
+ Throw error for an unfinished Unicode surrogate pair at the end of a
+ Unicode string (Tom Lane)
+
+
+
+ Normally, a Unicode surrogate leading character must be followed by a
+ Unicode surrogate trailing character, but the check for this was
+ missed if the leading character was the last character in a Unicode
+ string literal (U&'...'>) or Unicode identifier
+ (U&"...">).
+
+
+
+
+
+ Ensure that a purely negative text search query, such
+ as !foo>, matches empty tsvector>s (Tom Dunstan)
+
+
+
+ Such matches were found by GIN index searches, but not by sequential
+ scans or GiST index searches.
+
+
+
+
+
+ Prevent crash when ts_rewrite()> replaces a non-top-level
+ subtree with an empty query (Artur Zakirov)
+
+
+
+
+
+ Fix performance problems in ts_rewrite()> (Tom Lane)
+
+
+
+
+
+ Fix ts_rewrite()>'s handling of nested NOT operators
+ (Tom Lane)
+
+
+
+
+
+ Fix array_fill()> to handle empty arrays properly (Tom Lane)
+
+
+
+
+
+ Fix one-byte buffer overrun in quote_literal_cstr()>
+ (Heikki Linnakangas)
+
+
+
+ The overrun occurred only if the input consisted entirely of single
+ quotes and/or backslashes.
+
+
+
+
+
+ Prevent multiple calls of pg_start_backup()>
+ and pg_stop_backup()> from running concurrently (Michael
+ Paquier)
+
+
+
+ This avoids an assertion failure, and possibly worse things, if
+ someone tries to run these functions in parallel.
+
+
+
+
+
+ Avoid discarding interval>-to-interval> casts
+ that aren't really no-ops (Tom Lane)
+
+
+
+ In some cases, a cast that should result in zeroing out
+ low-order interval> fields was mistakenly deemed to be a
+ no-op and discarded. An example is that casting from INTERVAL
+ MONTH> to INTERVAL YEAR> failed to clear the months field.
+
+
+
+
+
+ Ensure that cached plans are invalidated by changes in foreign-table
+ options (Amit Langote, Etsuro Fujita, Ashutosh Bapat)
+
+
+
+
+
+ Fix pg_dump> to dump user-defined casts and transforms
+ that use built-in functions (Stephen Frost)
+
+
+
+
+
+ Fix possible pg_basebackup> failure on standby
+ server when including WAL files (Amit Kapila, Robert Haas)
+
+
+
+
+
+ Ensure that the Python exception objects we create for PL/Python are
+ properly reference-counted (Rafa de la Torre, Tom Lane)
+
+
+
+ This avoids failures if the objects are used after a Python garbage
+ collection cycle has occurred.
+
+
+
+
+
+ Fix PL/Tcl to support triggers on tables that have .tupno>
+ as a column name (Tom Lane)
+
+
+
+ This matches the (previously undocumented) behavior of
+ PL/Tcl's spi_exec> and spi_execp> commands,
+ namely that a magic .tupno> column is inserted only if
+ there isn't a real column named that.
+
+
+
+
+
+ Allow DOS-style line endings in ~/.pgpass> files,
+ even on Unix (Vik Fearing)
+
+
+
+ This change simplifies use of the same password file across Unix and
+ Windows machines.
+
+
+
+
+
+ Fix one-byte buffer overrun if ecpg> is given a file
+ name that ends with a dot (Takayuki Tsunakawa)
+
+
+
+
+
+ Fix psql>'s tab completion for ALTER DEFAULT
+ PRIVILEGES> (Gilles Darold, Stephen Frost)
+
+
+
+
+
+ In psql>, treat an empty or all-blank setting of
+ the PAGER> environment variable as meaning no
+ pager> (Tom Lane)
+
+
+
+ Previously, such a setting caused output intended for the pager to
+ vanish entirely.
+
+
+
+
+
+ Improve contrib/dblink>'s reporting of
+ low-level libpq> errors, such as out-of-memory
+ (Joe Conway)
+
+
+
+
+
+ Teach contrib/dblink> to ignore irrelevant server options
+ when it uses a contrib/postgres_fdw> foreign server as
+ the source of connection options (Corey Huinker)
+
+
+
+ Previously, if the foreign server object had options that were not
+ also libpq> connection options, an error occurred.
+
+
+
+
+
+ On Windows, ensure that environment variable changes are propagated
+ to DLLs built with debug options (Christian Ullrich)
+
+
+
+
+
+ Sync our copy of the timezone library with IANA release tzcode2016j
+ (Tom Lane)
+
+
+
+ This fixes various issues, most notably that timezone data
+ installation failed if the target directory didn't support hard
+ links.
+
+
+
+
+
+ Update time zone data files to tzdata> release 2016j
+ for DST law changes in northern Cyprus (adding a new zone
+ Asia/Famagusta), Russia (adding a new zone Europe/Saratov), Tonga,
+ and Antarctica/Casey.
+ Historical corrections for Italy, Kazakhstan, Malta, and Palestine.
+ Switch to preferring numeric zone abbreviations for Tonga.
+
+
+
+
+
+
+
+
Release 9.3.15
diff --git a/doc/src/sgml/release-9.4.sgml b/doc/src/sgml/release-9.4.sgml
index 94b028a065..b4e64214ad 100644
--- a/doc/src/sgml/release-9.4.sgml
+++ b/doc/src/sgml/release-9.4.sgml
@@ -1,6 +1,539 @@
+
+ Release 9.4.11
+
+
+ Release Date
+ 2017-02-09
+
+
+
+ This release contains a variety of fixes from 9.4.10.
+ For information about new features in the 9.4 major release, see
+ .
+
+
+
+ Migration to Version 9.4.11
+
+
+ A dump/restore is not required for those running 9.4.X.
+
+
+
+ However, if your installation has been affected by the bug described in
+ the first changelog entry below, then after updating you may need
+ to take action to repair corrupted indexes.
+
+
+
+ Also, if you are upgrading from a version earlier than 9.4.10,
+ see .
+
+
+
+
+ Changes
+
+
+
+
+
+ Fix a race condition that could cause indexes built
+ with CREATE INDEX CONCURRENTLY> to be corrupt
+ (Pavan Deolasee, Tom Lane)
+
+
+
+ If CREATE INDEX CONCURRENTLY> was used to build an index
+ that depends on a column not previously indexed, then rows inserted
+ or updated by transactions that ran concurrently with
+ the CREATE INDEX> command could have received incorrect
+ index entries. If you suspect this may have happened, the most
+ reliable solution is to rebuild affected indexes after installing
+ this update.
+
+
+
+
+
+ Ensure that the special snapshot used for catalog scans is not
+ invalidated by premature data pruning (Tom Lane)
+
+
+
+ Backends failed to account for this snapshot when advertising their
+ oldest xmin, potentially allowing concurrent vacuuming operations to
+ remove data that was still needed. This led to transient failures
+ along the lines of cache lookup failed for relation 1255>.
+
+
+
+
+
+ Unconditionally WAL-log creation of the init fork> for an
+ unlogged table (Michael Paquier)
+
+
+
+ Previously, this was skipped when
+ = minimal>, but actually it's necessary even in that case
+ to ensure that the unlogged table is properly reset to empty after a
+ crash.
+
+
+
+
+
+ Reduce interlocking on standby servers during the replay of btree
+ index vacuuming operations (Simon Riggs)
+
+
+
+ This change avoids substantial replication delays that sometimes
+ occurred while replaying such operations.
+
+
+
+
+
+ If the stats collector dies during hot standby, restart it (Takayuki
+ Tsunakawa)
+
+
+
+
+
+ Ensure that hot standby feedback works correctly when it's enabled at
+ standby server start (Ants Aasma, Craig Ringer)
+
+
+
+
+
+ Check for interrupts while hot standby is waiting for a conflicting
+ query (Simon Riggs)
+
+
+
+
+
+ Avoid constantly respawning the autovacuum launcher in a corner case
+ (Amit Khandekar)
+
+
+
+ This fix avoids problems when autovacuum is nominally off and there
+ are some tables that require freezing, but all such tables are
+ already being processed by autovacuum workers.
+
+
+
+
+
+ Fix check for when an extension member object can be dropped (Tom Lane)
+
+
+
+ Extension upgrade scripts should be able to drop member objects,
+ but this was disallowed for serial-column sequences, and possibly
+ other cases.
+
+
+
+
+
+ Make sure ALTER TABLE> preserves index tablespace
+ assignments when rebuilding indexes (Tom Lane, Michael Paquier)
+
+
+
+ Previously, non-default settings
+ of could result in broken
+ indexes.
+
+
+
+
+
+ Fix incorrect updating of trigger function properties when changing a
+ foreign-key constraint's deferrability properties with ALTER
+ TABLE ... ALTER CONSTRAINT> (Tom Lane)
+
+
+
+ This led to odd failures during subsequent exercise of the foreign
+ key, as the triggers were fired at the wrong times.
+
+
+
+
+
+ Prevent dropping a foreign-key constraint if there are pending
+ trigger events for the referenced relation (Tom Lane)
+
+
+
+ This avoids could not find trigger NNN>
+ or relation NNN> has no triggers errors.
+
+
+
+
+
+ Fix processing of OID column when a table with OIDs is associated to
+ a parent with OIDs via ALTER TABLE ... INHERIT> (Amit
+ Langote)
+
+
+
+ The OID column should be treated the same as regular user columns in
+ this case, but it wasn't, leading to odd behavior in later
+ inheritance changes.
+
+
+
+
+
+ Fix CREATE OR REPLACE VIEW> to update the view query
+ before attempting to apply the new view options (Dean Rasheed)
+
+
+
+ Previously the command would fail if the new options were
+ inconsistent with the old view definition.
+
+
+
+
+
+ Report correct object identity during ALTER TEXT SEARCH
+ CONFIGURATION> (Artur Zakirov)
+
+
+
+ The wrong catalog OID was reported to extensions such as logical
+ decoding.
+
+
+
+
+
+ Check for serializability conflicts before reporting
+ constraint-violation failures (Thomas Munro)
+
+
+
+ When using serializable transaction isolation, it is desirable
+ that any error due to concurrent transactions should manifest
+ as a serialization failure, thereby cueing the application that
+ a retry might succeed. Unfortunately, this does not reliably
+ happen for duplicate-key failures caused by concurrent insertions.
+ This change ensures that such an error will be reported as a
+ serialization error if the application explicitly checked for
+ the presence of a conflicting key (and did not find it) earlier
+ in the transaction.
+
+
+
+
+
+ Prevent multicolumn expansion of foo>.*> in
+ an UPDATE> source expression (Tom Lane)
+
+
+
+ This led to UPDATE target count mismatch --- internal
+ error>. Now the syntax is understood as a whole-row variable,
+ as it would be in other contexts.
+
+
+
+
+
+ Ensure that column typmods are determined accurately for
+ multi-row VALUES> constructs (Tom Lane)
+
+
+
+ This fixes problems occurring when the first value in a column has a
+ determinable typmod (e.g., length for a varchar> value) but
+ later values don't share the same limit.
+
+
+
+
+
+ Throw error for an unfinished Unicode surrogate pair at the end of a
+ Unicode string (Tom Lane)
+
+
+
+ Normally, a Unicode surrogate leading character must be followed by a
+ Unicode surrogate trailing character, but the check for this was
+ missed if the leading character was the last character in a Unicode
+ string literal (U&'...'>) or Unicode identifier
+ (U&"...">).
+
+
+
+
+
+ Ensure that a purely negative text search query, such
+ as !foo>, matches empty tsvector>s (Tom Dunstan)
+
+
+
+ Such matches were found by GIN index searches, but not by sequential
+ scans or GiST index searches.
+
+
+
+
+
+ Prevent crash when ts_rewrite()> replaces a non-top-level
+ subtree with an empty query (Artur Zakirov)
+
+
+
+
+
+ Fix performance problems in ts_rewrite()> (Tom Lane)
+
+
+
+
+
+ Fix ts_rewrite()>'s handling of nested NOT operators
+ (Tom Lane)
+
+
+
+
+
+ Fix array_fill()> to handle empty arrays properly (Tom Lane)
+
+
+
+
+
+ Fix one-byte buffer overrun in quote_literal_cstr()>
+ (Heikki Linnakangas)
+
+
+
+ The overrun occurred only if the input consisted entirely of single
+ quotes and/or backslashes.
+
+
+
+
+
+ Prevent multiple calls of pg_start_backup()>
+ and pg_stop_backup()> from running concurrently (Michael
+ Paquier)
+
+
+
+ This avoids an assertion failure, and possibly worse things, if
+ someone tries to run these functions in parallel.
+
+
+
+
+
+ Avoid discarding interval>-to-interval> casts
+ that aren't really no-ops (Tom Lane)
+
+
+
+ In some cases, a cast that should result in zeroing out
+ low-order interval> fields was mistakenly deemed to be a
+ no-op and discarded. An example is that casting from INTERVAL
+ MONTH> to INTERVAL YEAR> failed to clear the months field.
+
+
+
+
+
+ Ensure that cached plans are invalidated by changes in foreign-table
+ options (Amit Langote, Etsuro Fujita, Ashutosh Bapat)
+
+
+
+
+
+ Fix pg_dump> to dump user-defined casts and transforms
+ that use built-in functions (Stephen Frost)
+
+
+
+
+
+ Fix pg_restore> with
--create --if-exists>
+ to behave more sanely if an archive contains
+ unrecognized DROP> commands (Tom Lane)
+
+
+
+ This doesn't fix any live bug, but it may improve the behavior in
+ future if pg_restore> is used with an archive
+ generated by a later pg_dump> version.
+
+
+
+
+
+ Fix pg_basebackup>'s rate limiting in the presence of
+ slow I/O (Antonin Houska)
+
+
+
+ If disk I/O was transiently much slower than the specified rate
+ limit, the calculation overflowed, effectively disabling the rate
+ limit for the rest of the run.
+
+
+
+
+
+ Fix pg_basebackup>'s handling of
+ symlinked pg_stat_tmp> and pg_replslot>
+ subdirectories (Magnus Hagander, Michael Paquier)
+
+
+
+
+
+ Fix possible pg_basebackup> failure on standby
+ server when including WAL files (Amit Kapila, Robert Haas)
+
+
+
+
+
+ Ensure that the Python exception objects we create for PL/Python are
+ properly reference-counted (Rafa de la Torre, Tom Lane)
+
+
+
+ This avoids failures if the objects are used after a Python garbage
+ collection cycle has occurred.
+
+
+
+
+
+ Fix PL/Tcl to support triggers on tables that have .tupno>
+ as a column name (Tom Lane)
+
+
+
+ This matches the (previously undocumented) behavior of
+ PL/Tcl's spi_exec> and spi_execp> commands,
+ namely that a magic .tupno> column is inserted only if
+ there isn't a real column named that.
+
+
+
+
+
+ Allow DOS-style line endings in ~/.pgpass> files,
+ even on Unix (Vik Fearing)
+
+
+
+ This change simplifies use of the same password file across Unix and
+ Windows machines.
+
+
+
+
+
+ Fix one-byte buffer overrun if ecpg> is given a file
+ name that ends with a dot (Takayuki Tsunakawa)
+
+
+
+
+
+ Fix psql>'s tab completion for ALTER DEFAULT
+ PRIVILEGES> (Gilles Darold, Stephen Frost)
+
+
+
+
+
+ In psql>, treat an empty or all-blank setting of
+ the PAGER> environment variable as meaning no
+ pager> (Tom Lane)
+
+
+
+ Previously, such a setting caused output intended for the pager to
+ vanish entirely.
+
+
+
+
+
+ Improve contrib/dblink>'s reporting of
+ low-level libpq> errors, such as out-of-memory
+ (Joe Conway)
+
+
+
+
+
+ Teach contrib/dblink> to ignore irrelevant server options
+ when it uses a contrib/postgres_fdw> foreign server as
+ the source of connection options (Corey Huinker)
+
+
+
+ Previously, if the foreign server object had options that were not
+ also libpq> connection options, an error occurred.
+
+
+
+
+
+ On Windows, ensure that environment variable changes are propagated
+ to DLLs built with debug options (Christian Ullrich)
+
+
+
+
+
+ Sync our copy of the timezone library with IANA release tzcode2016j
+ (Tom Lane)
+
+
+
+ This fixes various issues, most notably that timezone data
+ installation failed if the target directory didn't support hard
+ links.
+
+
+
+
+
+ Update time zone data files to tzdata> release 2016j
+ for DST law changes in northern Cyprus (adding a new zone
+ Asia/Famagusta), Russia (adding a new zone Europe/Saratov), Tonga,
+ and Antarctica/Casey.
+ Historical corrections for Italy, Kazakhstan, Malta, and Palestine.
+ Switch to preferring numeric zone abbreviations for Tonga.
+
+
+
+
+
+
+
+
Release 9.4.10
diff --git a/doc/src/sgml/release-9.5.sgml b/doc/src/sgml/release-9.5.sgml
index abc0337026..2438b95b8c 100644
--- a/doc/src/sgml/release-9.5.sgml
+++ b/doc/src/sgml/release-9.5.sgml
@@ -1,6 +1,668 @@
+
+ Release 9.5.6
+
+
+ Release Date
+ 2017-02-09
+
+
+
+ This release contains a variety of fixes from 9.5.5.
+ For information about new features in the 9.5 major release, see
+ .
+
+
+
+ Migration to Version 9.5.6
+
+
+ A dump/restore is not required for those running 9.5.X.
+
+
+
+ However, if your installation has been affected by the bug described in
+ the first changelog entry below, then after updating you may need
+ to take action to repair corrupted indexes.
+
+
+
+ Also, if you are upgrading from a version earlier than 9.5.5,
+ see .
+
+
+
+
+ Changes
+
+
+
+
+
+ Fix a race condition that could cause indexes built
+ with CREATE INDEX CONCURRENTLY> to be corrupt
+ (Pavan Deolasee, Tom Lane)
+
+
+
+ If CREATE INDEX CONCURRENTLY> was used to build an index
+ that depends on a column not previously indexed, then rows inserted
+ or updated by transactions that ran concurrently with
+ the CREATE INDEX> command could have received incorrect
+ index entries. If you suspect this may have happened, the most
+ reliable solution is to rebuild affected indexes after installing
+ this update.
+
+
+
+
+
+ Ensure that the special snapshot used for catalog scans is not
+ invalidated by premature data pruning (Tom Lane)
+
+
+
+ Backends failed to account for this snapshot when advertising their
+ oldest xmin, potentially allowing concurrent vacuuming operations to
+ remove data that was still needed. This led to transient failures
+ along the lines of cache lookup failed for relation 1255>.
+
+
+
+
+
+ Fix incorrect WAL logging for BRIN indexes (Kuntal Ghosh)
+
+
+
+ The WAL record emitted for a BRIN revmap> page when moving an
+ index tuple to a different page was incorrect. Replay would make the
+ related portion of the index useless, forcing it to be recomputed.
+
+
+
+
+
+ Unconditionally WAL-log creation of the init fork> for an
+ unlogged table (Michael Paquier)
+
+
+
+ Previously, this was skipped when
+ = minimal>, but actually it's necessary even in that case
+ to ensure that the unlogged table is properly reset to empty after a
+ crash.
+
+
+
+
+
+
+ Reduce interlocking on standby servers during the replay of btree
+ index vacuuming operations (Simon Riggs)
+
+
+
+ This change avoids substantial replication delays that sometimes
+ occurred while replaying such operations.
+
+
+
+
+
+ If the stats collector dies during hot standby, restart it (Takayuki
+ Tsunakawa)
+
+
+
+
+
+ Ensure that hot standby feedback works correctly when it's enabled at
+ standby server start (Ants Aasma, Craig Ringer)
+
+
+
+
+
+ Check for interrupts while hot standby is waiting for a conflicting
+ query (Simon Riggs)
+
+
+
+
+
+ Avoid constantly respawning the autovacuum launcher in a corner case
+ (Amit Khandekar)
+
+
+
+ This fix avoids problems when autovacuum is nominally off and there
+ are some tables that require freezing, but all such tables are
+ already being processed by autovacuum workers.
+
+
+
+
+
+ Fix check for when an extension member object can be dropped (Tom Lane)
+
+
+
+ Extension upgrade scripts should be able to drop member objects,
+ but this was disallowed for serial-column sequences, and possibly
+ other cases.
+
+
+
+
+
+ Make sure ALTER TABLE> preserves index tablespace
+ assignments when rebuilding indexes (Tom Lane, Michael Paquier)
+
+
+
+ Previously, non-default settings
+ of could result in broken
+ indexes.
+
+
+
+
+
+ Fix incorrect updating of trigger function properties when changing a
+ foreign-key constraint's deferrability properties with ALTER
+ TABLE ... ALTER CONSTRAINT> (Tom Lane)
+
+
+
+ This led to odd failures during subsequent exercise of the foreign
+ key, as the triggers were fired at the wrong times.
+
+
+
+
+
+ Prevent dropping a foreign-key constraint if there are pending
+ trigger events for the referenced relation (Tom Lane)
+
+
+
+ This avoids could not find trigger NNN>
+ or relation NNN> has no triggers errors.
+
+
+
+
+
+ Fix ALTER TABLE ... SET DATA TYPE ... USING> when child
+ table has different column ordering than the parent
+ (Álvaro Herrera)
+
+
+
+ Failure to adjust the column numbering in the USING>
+ expression led to errors,
+ typically attribute N> has wrong type.
+
+
+
+
+
+ Fix processing of OID column when a table with OIDs is associated to
+ a parent with OIDs via ALTER TABLE ... INHERIT> (Amit
+ Langote)
+
+
+
+ The OID column should be treated the same as regular user columns in
+ this case, but it wasn't, leading to odd behavior in later
+ inheritance changes.
+
+
+
+
+
+ Fix CREATE OR REPLACE VIEW> to update the view query
+ before attempting to apply the new view options (Dean Rasheed)
+
+
+
+ Previously the command would fail if the new options were
+ inconsistent with the old view definition.
+
+
+
+
+
+ Report correct object identity during ALTER TEXT SEARCH
+ CONFIGURATION> (Artur Zakirov)
+
+
+
+ The wrong catalog OID was reported to extensions such as logical
+ decoding.
+
+
+
+
+
+ Fix commit timestamp mechanism to not fail when queried about
+ the special XIDs FrozenTransactionId>
+ and BootstrapTransactionId> (Craig Ringer)
+
+
+
+
+
+
+ Check for serializability conflicts before reporting
+ constraint-violation failures (Thomas Munro)
+
+
+
+ When using serializable transaction isolation, it is desirable
+ that any error due to concurrent transactions should manifest
+ as a serialization failure, thereby cueing the application that
+ a retry might succeed. Unfortunately, this does not reliably
+ happen for duplicate-key failures caused by concurrent insertions.
+ This change ensures that such an error will be reported as a
+ serialization error if the application explicitly checked for
+ the presence of a conflicting key (and did not find it) earlier
+ in the transaction.
+
+
+
+
+
+ Fix incorrect use of view reloptions as regular table reloptions (Tom
+ Lane)
+
+
+
+ The symptom was spurious ON CONFLICT is not supported on table
+ ... used as a catalog table> errors when the target
+ of INSERT ... ON CONFLICT> is a view with cascade option.
+
+
+
+
+
+ Fix incorrect target lists can have at most N>
+ entries complaint when using ON CONFLICT> with
+ wide tables (Tom Lane)
+
+
+
+
+
+ Prevent multicolumn expansion of foo>.*> in
+ an UPDATE> source expression (Tom Lane)
+
+
+
+ This led to UPDATE target count mismatch --- internal
+ error>. Now the syntax is understood as a whole-row variable,
+ as it would be in other contexts.
+
+
+
+
+
+ Ensure that column typmods are determined accurately for
+ multi-row VALUES> constructs (Tom Lane)
+
+
+
+ This fixes problems occurring when the first value in a column has a
+ determinable typmod (e.g., length for a varchar> value) but
+ later values don't share the same limit.
+
+
+
+
+
+ Throw error for an unfinished Unicode surrogate pair at the end of a
+ Unicode string (Tom Lane)
+
+
+
+ Normally, a Unicode surrogate leading character must be followed by a
+ Unicode surrogate trailing character, but the check for this was
+ missed if the leading character was the last character in a Unicode
+ string literal (U&'...'>) or Unicode identifier
+ (U&"...">).
+
+
+
+
+
+ Ensure that a purely negative text search query, such
+ as !foo>, matches empty tsvector>s (Tom Dunstan)
+
+
+
+ Such matches were found by GIN index searches, but not by sequential
+ scans or GiST index searches.
+
+
+
+
+
+ Prevent crash when ts_rewrite()> replaces a non-top-level
+ subtree with an empty query (Artur Zakirov)
+
+
+
+
+
+ Fix performance problems in ts_rewrite()> (Tom Lane)
+
+
+
+
+
+ Fix ts_rewrite()>'s handling of nested NOT operators
+ (Tom Lane)
+
+
+
+
+
+ Improve speed of user-defined aggregates that
+ use array_append()> as transition function (Tom Lane)
+
+
+
+
+
+ Fix array_fill()> to handle empty arrays properly (Tom Lane)
+
+
+
+
+
+ Fix possible crash in array_position()>
+ or array_positions()> when processing arrays of records
+ (Junseok Yang)
+
+
+
+
+
+ Fix one-byte buffer overrun in quote_literal_cstr()>
+ (Heikki Linnakangas)
+
+
+
+ The overrun occurred only if the input consisted entirely of single
+ quotes and/or backslashes.
+
+
+
+
+
+ Prevent multiple calls of pg_start_backup()>
+ and pg_stop_backup()> from running concurrently (Michael
+ Paquier)
+
+
+
+ This avoids an assertion failure, and possibly worse things, if
+ someone tries to run these functions in parallel.
+
+
+
+
+
+ Disable transform that attempted to remove no-op AT TIME
+ ZONE> conversions (Tom Lane)
+
+
+
+ This resulted in wrong answers when the simplified expression was
+ used in an index condition.
+
+
+
+
+
+ Avoid discarding interval>-to-interval> casts
+ that aren't really no-ops (Tom Lane)
+
+
+
+ In some cases, a cast that should result in zeroing out
+ low-order interval> fields was mistakenly deemed to be a
+ no-op and discarded. An example is that casting from INTERVAL
+ MONTH> to INTERVAL YEAR> failed to clear the months field.
+
+
+
+
+
+ Fix bugs in transmitting GUC parameter values to parallel workers
+ (Michael Paquier, Tom Lane)
+
+
+
+
+
+ Ensure that cached plans are invalidated by changes in foreign-table
+ options (Amit Langote, Etsuro Fujita, Ashutosh Bapat)
+
+
+
+
+
+ Fix pg_dump> to dump user-defined casts and transforms
+ that use built-in functions (Stephen Frost)
+
+
+
+
+
+ Fix pg_restore> with
--create --if-exists>
+ to behave more sanely if an archive contains
+ unrecognized DROP> commands (Tom Lane)
+
+
+
+ This doesn't fix any live bug, but it may improve the behavior in
+ future if pg_restore> is used with an archive
+ generated by a later pg_dump> version.
+
+
+
+
+
+ Fix pg_basebackup>'s rate limiting in the presence of
+ slow I/O (Antonin Houska)
+
+
+
+ If disk I/O was transiently much slower than the specified rate
+ limit, the calculation overflowed, effectively disabling the rate
+ limit for the rest of the run.
+
+
+
+
+
+ Fix pg_basebackup>'s handling of
+ symlinked pg_stat_tmp> and pg_replslot>
+ subdirectories (Magnus Hagander, Michael Paquier)
+
+
+
+
+
+ Fix possible pg_basebackup> failure on standby
+ server when including WAL files (Amit Kapila, Robert Haas)
+
+
+
+
+
+ Fix possible mishandling of expanded arrays in domain check
+ constraints and CASE> execution (Tom Lane)
+
+
+
+ It was possible for a PL/pgSQL function invoked in these contexts to
+ modify or even delete an array value that needs to be preserved for
+ additional operations.
+
+
+
+
+
+ Fix nested uses of PL/pgSQL functions in contexts such as domain
+ check constraints evaluated during assignment to a PL/pgSQL variable
+ (Tom Lane)
+
+
+
+
+
+ Ensure that the Python exception objects we create for PL/Python are
+ properly reference-counted (Rafa de la Torre, Tom Lane)
+
+
+
+ This avoids failures if the objects are used after a Python garbage
+ collection cycle has occurred.
+
+
+
+
+
+ Fix PL/Tcl to support triggers on tables that have .tupno>
+ as a column name (Tom Lane)
+
+
+
+ This matches the (previously undocumented) behavior of
+ PL/Tcl's spi_exec> and spi_execp> commands,
+ namely that a magic .tupno> column is inserted only if
+ there isn't a real column named that.
+
+
+
+
+
+ Allow DOS-style line endings in ~/.pgpass> files,
+ even on Unix (Vik Fearing)
+
+
+
+ This change simplifies use of the same password file across Unix and
+ Windows machines.
+
+
+
+
+
+ Fix one-byte buffer overrun if ecpg> is given a file
+ name that ends with a dot (Takayuki Tsunakawa)
+
+
+
+
+
+ Fix psql>'s tab completion for ALTER DEFAULT
+ PRIVILEGES> (Gilles Darold, Stephen Frost)
+
+
+
+
+
+ In psql>, treat an empty or all-blank setting of
+ the PAGER> environment variable as meaning no
+ pager> (Tom Lane)
+
+
+
+ Previously, such a setting caused output intended for the pager to
+ vanish entirely.
+
+
+
+
+
+ Improve contrib/dblink>'s reporting of
+ low-level libpq> errors, such as out-of-memory
+ (Joe Conway)
+
+
+
+
+
+ Teach contrib/dblink> to ignore irrelevant server options
+ when it uses a contrib/postgres_fdw> foreign server as
+ the source of connection options (Corey Huinker)
+
+
+
+ Previously, if the foreign server object had options that were not
+ also libpq> connection options, an error occurred.
+
+
+
+
+
+ Fix portability problems in contrib/pageinspect>'s
+ functions for GIN indexes (Peter Eisentraut, Tom Lane)
+
+
+
+
+
+ On Windows, ensure that environment variable changes are propagated
+ to DLLs built with debug options (Christian Ullrich)
+
+
+
+
+
+ Sync our copy of the timezone library with IANA release tzcode2016j
+ (Tom Lane)
+
+
+
+ This fixes various issues, most notably that timezone data
+ installation failed if the target directory didn't support hard
+ links.
+
+
+
+
+
+ Update time zone data files to tzdata> release 2016j
+ for DST law changes in northern Cyprus (adding a new zone
+ Asia/Famagusta), Russia (adding a new zone Europe/Saratov), Tonga,
+ and Antarctica/Casey.
+ Historical corrections for Italy, Kazakhstan, Malta, and Palestine.
+ Switch to preferring numeric zone abbreviations for Tonga.
+
+
+
+
+
+
+
+
Release 9.5.5
@@ -64,7 +726,7 @@
-
- Fix WAL page header validation when re-reading segments (Takayuki
- Tsunakawa, Amit Kapila)
-
-
-
- In corner cases, a spurious out-of-sequence TLI> error
- could be reported during recovery.
-
-
-
-
-
-
- Reduce interlocking on standby servers during the replay of btree
- index vacuuming operations (Simon Riggs)
-
-
-
- This change avoids substantial replication delays that sometimes
- occurred while replaying such operations.
-
-
-
-
-
-
- Check for serializability conflicts before reporting
- constraint-violation failures (Thomas Munro)
-
-
-
- When using serializable transaction isolation, it is desirable
- that any error due to concurrent transactions should manifest
- as a serialization failure, thereby cueing the application that
- a retry might succeed. Unfortunately, this does not reliably
- happen for duplicate-key failures caused by concurrent insertions.
- This change ensures that such an error will be reported as a
- serialization error if the application explicitly checked for
- the presence of a conflicting key (and did not find it) earlier
- in the transaction.
-
-
-
-
-
+
+
+
+ pg_receivewal
+
+
+
+ pg_receivewal
+ 1
+ Application
+
+
+
+ pg_receivewal
+ stream transaction logs from a PostgreSQL server
+
+
+
+
+ pg_receivewal
+ option>
+
+
+
+
+
+ Description
+
+
+ pg_receivewal is used to stream the transaction log
+ from a running PostgreSQL cluster. The transaction
+ log is streamed using the streaming replication protocol, and is written
+ to a local directory of files. This directory can be used as the archive
+ location for doing a restore using point-in-time recovery (see
+ ).
+
+
+
+ pg_receivewal streams the transaction
+ log in real time as it's being generated on the server, and does not wait
+ for segments to complete like does.
+ For this reason, it is not necessary to set
+ when using
+ pg_receivewal.
+
+
+
+ Unlike the WAL receiver of a PostgreSQL standby server, pg_receivewal>
+ by default flushes WAL data only when a WAL file is closed.
+ The option
--synchronous> must be specified to flush WAL data
+ in real time.
+
+
+
+ The transaction log is streamed over a regular
+ PostgreSQL connection and uses the replication
+ protocol. The connection must be made with a superuser or a user
+ having REPLICATION permissions (see
+ ), and pg_hba.conf
+ must permit the replication connection. The server must also be
+ configured with set high enough to
+ leave at least one session available for the stream.
+
+
+
+ If the connection is lost, or if it cannot be initially established,
+ with a non-fatal error, pg_receivewal will
+ retry the connection indefinitely, and reestablish streaming as soon
+ as possible. To avoid this behavior, use the -n
+ parameter.
+
+
+
+
+ Options
+
+
+
+
-D directory
+
--directory=directory
+
+
+ Directory to write the output to.
+
+
+ This parameter is required.
+
+
+
+
+
+
--if-not-exists
+
+
+ Do not error out when
--create-slot
is specified
+ and a slot with the specified name already exists.
+
+
+
+
+
+
-n
+
--no-loop
+
+
+ Don't loop on connection errors. Instead, exit right away with
+ an error.
+
+
+
+
+
+
-s interval
+
--status-interval=interval
+
+
+ Specifies the number of seconds between status packets sent back to the
+ server. This allows for easier monitoring of the progress from server.
+ A value of zero disables the periodic status updates completely,
+ although an update will still be sent when requested by the server, to
+ avoid timeout disconnect. The default value is 10 seconds.
+
+
+
+
+
+
-S slotname
+
--slot=slotname
+
+
+ Require pg_receivewal to use an existing
+ replication slot (see ).
+ When this option is used, pg_receivewal> will report
+ a flush position to the server, indicating when each segment has been
+ synchronized to disk so that the server can remove that segment if it
+ is not otherwise needed.
+
+
+
+ When the replication client
+ of pg_receivewal is configured on the
+ server as a synchronous standby, then using a replication slot will
+ report the flush position to the server, but only when a WAL file is
+ closed. Therefore, that configuration will cause transactions on the
+ primary to wait for a long time and effectively not work
+ satisfactorily. The option --synchronous (see
+ below) must be specified in addition to make this work correctly.
+
+
+
+
+
+
--synchronous
+
+
+ Flush the WAL data to disk immediately after it has been received. Also
+ send a status packet back to the server immediately after flushing,
+ regardless of --status-interval>.
+
+
+
+ This option should be specified if the replication client
+ of pg_receivewal is configured on the
+ server as a synchronous standby, to ensure that timely feedback is
+ sent to the server.
+
+
+
+
+
+
-v
+
--verbose
+
+
+ Enables verbose mode.
+
+
+
+
+
+
-Z level
+
--compress=level
+
+
+ Enables gzip compression of transaction logs, and specifies the
+ compression level (0 through 9, 0 being no compression and 9 being best
+ compression). The suffix .gz will
+ automatically be added to all filenames.
+
+
+
+
+
+
+ The following command-line options control the database connection parameters.
+
+
+
+
-d connstr
+
--dbname=connstr
+
+
+ Specifies parameters used to connect to the server, as a connection
+ string. See for more information.
+
+
+ The option is called --dbname> for consistency with other
+ client applications, but because pg_receivewal
+ doesn't connect to any particular database in the cluster, database
+ name in the connection string will be ignored.
+
+
+
+
+
+
-h host
+
--host=host
+
+
+ Specifies the host name of the machine on which the server is
+ running. If the value begins with a slash, it is used as the
+ directory for the Unix domain socket. The default is taken
+ from the PGHOST environment variable, if set,
+ else a Unix domain socket connection is attempted.
+
+
+
+
+
+
-p port
+
--port=port
+
+
+ Specifies the TCP port or local Unix domain socket file
+ extension on which the server is listening for connections.
+ Defaults to the PGPORT environment variable, if
+ set, or a compiled-in default.
+
+
+
+
+
+
-U username
+
--username=username
+
+
+ User name to connect as.
+
+
+
+
+
+
-w>
+
--no-password>
+
+
+ Never issue a password prompt. If the server requires
+ password authentication and a password is not available by
+ other means such as a .pgpass file, the
+ connection attempt will fail. This option can be useful in
+ batch jobs and scripts where no user is present to enter a
+ password.
+
+
+
+
+
+
-W
+
--password
+
+
+ Force pg_receivewal to prompt for a
+ password before connecting to a database.
+
+
+
+ This option is never essential, since
+ pg_receivewal will automatically prompt
+ for a password if the server demands password authentication.
+ However, pg_receivewal will waste a
+ connection attempt finding out that the server wants a password.
+ In some cases it is worth typing
-W> to avoid the extra
+ connection attempt.
+
+
+
+
+
+
+
+ pg_receivewal can perform one of the two
+ following actions in order to control physical replication slots:
+
+
+
+
--create-slot
+
+
+ Create a new physical replication slot with the name specified in
+
--slot
, then exit.
+
+
+
+
+
+
--drop-slot
+
+
+ Drop the replication slot with the name specified in
+
--slot
, then exit.
+
+
+
+
+
+
+
+ Other options are also available:
+
+
+
+
-V>
+
--version>
+
+
+ Print the pg_receivewal version and exit.
+
+
+
+
+
+
-?>
+
--help>
+
+
+ Show help about pg_receivewal command line
+ arguments, and exit.
+
+
+
+
+
+
+
+
+
+
+ Environment
+
+
+ This utility, like most other PostgreSQL> utilities,
+ uses the environment variables supported by libpq>
+ (see ).
+
+
+
+
+
+ Notes
+
+
+ When using pg_receivewal instead of
+ as the main WAL backup method, it is
+ strongly recommended to use replication slots. Otherwise, the server is
+ free to recycle or remove transaction log files before they are backed up,
+ because it does not have any information, either
+ from or the replication slots, about
+ how far the WAL stream has been archived. Note, however, that a
+ replication slot will fill up the server's disk space if the receiver does
+ not keep up with fetching the WAL data.
+
+
+
+
+
+ Examples
+
+
+ To stream the transaction log from the server at
+ mydbserver and store it in the local directory
+ /usr/local/pgsql/archive:
+
+$pg_receivewal -h mydbserver -D /usr/local/pgsql/archive
+
+
+
+
+ See Also
+
+
+
+
+
+
+
diff --git a/doc/src/sgml/ref/pg_receivexlog.sgml b/doc/src/sgml/ref/pg_receivexlog.sgml
deleted file mode 100644
index 8c1ea9a2e2..0000000000
--- a/doc/src/sgml/ref/pg_receivexlog.sgml
+++ /dev/null
@@ -1,399 +0,0 @@
-
-
-
-
- pg_receivexlog
-
-
-
- pg_receivexlog
- 1
- Application
-
-
-
- pg_receivexlog
- stream transaction logs from a PostgreSQL server
-
-
-
-
- pg_receivexlog
- option>
-
-
-
-
-
- Description
-
-
- pg_receivexlog is used to stream the transaction log
- from a running PostgreSQL cluster. The transaction
- log is streamed using the streaming replication protocol, and is written
- to a local directory of files. This directory can be used as the archive
- location for doing a restore using point-in-time recovery (see
- ).
-
-
-
- pg_receivexlog streams the transaction
- log in real time as it's being generated on the server, and does not wait
- for segments to complete like does.
- For this reason, it is not necessary to set
- when using
- pg_receivexlog.
-
-
-
- Unlike the WAL receiver of a PostgreSQL standby server, pg_receivexlog>
- by default flushes WAL data only when a WAL file is closed.
- The option
--synchronous> must be specified to flush WAL data
- in real time.
-
-
-
- The transaction log is streamed over a regular
- PostgreSQL connection and uses the replication
- protocol. The connection must be made with a superuser or a user
- having REPLICATION permissions (see
- ), and pg_hba.conf
- must permit the replication connection. The server must also be
- configured with set high enough to
- leave at least one session available for the stream.
-
-
-
- If the connection is lost, or if it cannot be initially established,
- with a non-fatal error, pg_receivexlog will
- retry the connection indefinitely, and reestablish streaming as soon
- as possible. To avoid this behavior, use the -n
- parameter.
-
-
-
-
- Options
-
-
-
-
-D directory
-
--directory=directory
-
-
- Directory to write the output to.
-
-
- This parameter is required.
-
-
-
-
-
-
--if-not-exists
-
-
- Do not error out when
--create-slot
is specified
- and a slot with the specified name already exists.
-
-
-
-
-
-
-n
-
--no-loop
-
-
- Don't loop on connection errors. Instead, exit right away with
- an error.
-
-
-
-
-
-
-s interval
-
--status-interval=interval
-
-
- Specifies the number of seconds between status packets sent back to the
- server. This allows for easier monitoring of the progress from server.
- A value of zero disables the periodic status updates completely,
- although an update will still be sent when requested by the server, to
- avoid timeout disconnect. The default value is 10 seconds.
-
-
-
-
-
-
-S slotname
-
--slot=slotname
-
-
- Require pg_receivexlog to use an existing
- replication slot (see ).
- When this option is used, pg_receivexlog> will report
- a flush position to the server, indicating when each segment has been
- synchronized to disk so that the server can remove that segment if it
- is not otherwise needed.
-
-
-
- When the replication client
- of pg_receivexlog is configured on the
- server as a synchronous standby, then using a replication slot will
- report the flush position to the server, but only when a WAL file is
- closed. Therefore, that configuration will cause transactions on the
- primary to wait for a long time and effectively not work
- satisfactorily. The option --synchronous (see
- below) must be specified in addition to make this work correctly.
-
-
-
-
-
-
--synchronous
-
-
- Flush the WAL data to disk immediately after it has been received. Also
- send a status packet back to the server immediately after flushing,
- regardless of --status-interval>.
-
-
-
- This option should be specified if the replication client
- of pg_receivexlog is configured on the
- server as a synchronous standby, to ensure that timely feedback is
- sent to the server.
-
-
-
-
-
-
-v
-
--verbose
-
-
- Enables verbose mode.
-
-
-
-
-
-
-Z level
-
--compress=level
-
-
- Enables gzip compression of transaction logs, and specifies the
- compression level (0 through 9, 0 being no compression and 9 being best
- compression). The suffix .gz will
- automatically be added to all filenames.
-
-
-
-
-
-
- The following command-line options control the database connection parameters.
-
-
-
-
-d connstr
-
--dbname=connstr
-
-
- Specifies parameters used to connect to the server, as a connection
- string. See for more information.
-
-
- The option is called --dbname> for consistency with other
- client applications, but because pg_receivexlog
- doesn't connect to any particular database in the cluster, database
- name in the connection string will be ignored.
-
-
-
-
-
-
-h host
-
--host=host
-
-
- Specifies the host name of the machine on which the server is
- running. If the value begins with a slash, it is used as the
- directory for the Unix domain socket. The default is taken
- from the PGHOST environment variable, if set,
- else a Unix domain socket connection is attempted.
-
-
-
-
-
-
-p port
-
--port=port
-
-
- Specifies the TCP port or local Unix domain socket file
- extension on which the server is listening for connections.
- Defaults to the PGPORT environment variable, if
- set, or a compiled-in default.
-
-
-
-
-
-
-U username
-
--username=username
-
-
- User name to connect as.
-
-
-
-
-
-
-w>
-
--no-password>
-
-
- Never issue a password prompt. If the server requires
- password authentication and a password is not available by
- other means such as a .pgpass file, the
- connection attempt will fail. This option can be useful in
- batch jobs and scripts where no user is present to enter a
- password.
-
-
-
-
-
-
-W
-
--password
-
-
- Force pg_receivexlog to prompt for a
- password before connecting to a database.
-
-
-
- This option is never essential, since
- pg_receivexlog will automatically prompt
- for a password if the server demands password authentication.
- However, pg_receivexlog will waste a
- connection attempt finding out that the server wants a password.
- In some cases it is worth typing
-W> to avoid the extra
- connection attempt.
-
-
-
-
-
-
-
- pg_receivexlog can perform one of the two
- following actions in order to control physical replication slots:
-
-
-
-
--create-slot
-
-
- Create a new physical replication slot with the name specified in
-
--slot
, then exit.
-
-
-
-
-
-
--drop-slot
-
-
- Drop the replication slot with the name specified in
-
--slot
, then exit.
-
-
-
-
-
-
-
- Other options are also available:
-
-
-
-
-V>
-
--version>
-
-
- Print the pg_receivexlog version and exit.
-
-
-
-
-
-
-?>
-
--help>
-
-
- Show help about pg_receivexlog command line
- arguments, and exit.
-
-
-
-
-
-
-
-
-
-
- Environment
-
-
- This utility, like most other PostgreSQL> utilities,
- uses the environment variables supported by libpq>
- (see ).
-
-
-
-
-
- Notes
-
-
- When using pg_receivexlog instead of
- as the main WAL backup method, it is
- strongly recommended to use replication slots. Otherwise, the server is
- free to recycle or remove transaction log files before they are backed up,
- because it does not have any information, either
- from or the replication slots, about
- how far the WAL stream has been archived. Note, however, that a
- replication slot will fill up the server's disk space if the receiver does
- not keep up with fetching the WAL data.
-
-
-
-
-
- Examples
-
-
- To stream the transaction log from the server at
- mydbserver and store it in the local directory
- /usr/local/pgsql/archive:
-
-$pg_receivexlog -h mydbserver -D /usr/local/pgsql/archive
-
-
-
-
- See Also
-
-
-
-
-
-
-
diff --git a/doc/src/sgml/ref/pg_recvlogical.sgml b/doc/src/sgml/ref/pg_recvlogical.sgml
index d066ce8701..eaea94df8b 100644
--- a/doc/src/sgml/ref/pg_recvlogical.sgml
+++ b/doc/src/sgml/ref/pg_recvlogical.sgml
@@ -35,7 +35,7 @@ PostgreSQL documentation
It creates a replication-mode connection, so it is subject to the same
- constraints as , plus those for logical
+ constraints as , plus those for logical
replication (see ).
@@ -238,7 +238,7 @@ PostgreSQL documentation
This option has the same effect as the option of the same name
- in . See the description there.
+ in . See the description there.
@@ -411,7 +411,7 @@ PostgreSQL documentation
See Also
-
+
diff --git a/doc/src/sgml/ref/pg_resetwal.sgml b/doc/src/sgml/ref/pg_resetwal.sgml
new file mode 100644
index 0000000000..0cc6fb4c4d
--- /dev/null
+++ b/doc/src/sgml/ref/pg_resetwal.sgml
@@ -0,0 +1,293 @@
+
+
+
+
+ pg_resetwal
+
+
+
+ pg_resetwal
+ 1
+ Application
+
+
+
+ pg_resetwal
+ reset the write-ahead log and other control information of a PostgreSQL database cluster
+
+
+
+
+ pg_resetwal
+
-f
+
-n
+ option
+
-D
datadir
+
+
+
+
+ Description
+
+ pg_resetwal clears the write-ahead log (WAL) and
+ optionally resets some other control information stored in the
+ pg_control> file. This function is sometimes needed
+ if these files have become corrupted. It should be used only as a
+ last resort, when the server will not start due to such corruption.
+
+
+
+ After running this command, it should be possible to start the server,
+ but bear in mind that the database might contain inconsistent data due to
+ partially-committed transactions. You should immediately dump your data,
+ run initdb>, and reload. After reload, check for
+ inconsistencies and repair as needed.
+
+
+
+ This utility can only be run by the user who installed the server, because
+ it requires read/write access to the data directory.
+ For safety reasons, you must specify the data directory on the command line.
+ pg_resetwal does not use the environment variable
+ PGDATA>.
+
+
+
+ If pg_resetwal complains that it cannot determine
+ valid data for pg_control>, you can force it to proceed anyway
+ by specifying the
-f> (force) option. In this case plausible
+ values will be substituted for the missing data. Most of the fields can be
+ expected to match, but manual assistance might be needed for the next OID,
+ next transaction ID and epoch, next multitransaction ID and offset, and
+ WAL starting address fields. These fields can be set using the options
+ discussed below. If you are not able to determine correct values for all
+ these fields,
-f> can still be used, but
+ the recovered database must be treated with even more suspicion than
+ usual: an immediate dump and reload is imperative. Do not>
+ execute any data-modifying operations in the database before you dump,
+ as any such action is likely to make the corruption worse.
+
+
+
+
+ Options
+
+
+
+
-f
+
+
+ Force pg_resetwal to proceed even if it cannot determine
+ valid data for pg_control>, as explained above.
+
+
+
+
+
+
-n
+
+
+ The
-n> (no operation) option instructs
+ pg_resetwal to print the values reconstructed from
+ pg_control> and values about to be changed, and then exit
+ without modifying anything. This is mainly a debugging tool, but can be
+ useful as a sanity check before allowing pg_resetwal
+ to proceed for real.
+
+
+
+
+
+
-V
+
--version
+ Display version information, then exit.
+
+
+
+
-?
+
--help
+ Show help, then exit.
+
+
+
+
+ The following options are only needed when
+ pg_resetwal is unable to determine appropriate values
+ by reading pg_control>. Safe values can be determined as
+ described below. For values that take numeric arguments, hexadecimal
+ values can be specified by using the prefix 0x.
+
+
+
+
+
-c
xid,xid
+
+
+ Manually set the oldest and newest transaction IDs for which the commit
+ time can be retrieved.
+
+
+
+ A safe value for the oldest transaction ID for which the commit time can
+ be retrieved (first part) can be determined by looking
+ for the numerically smallest file name in the directory
+ pg_commit_ts> under the data directory. Conversely, a safe
+ value for the newest transaction ID for which the commit time can be
+ retrieved (second part) can be determined by looking for the numerically
+ greatest file name in the same directory. The file names are in
+ hexadecimal.
+
+
+
+
+
+
-e
xid_epoch
+
+
+ Manually set the next transaction ID's epoch.
+
+
+
+ The transaction ID epoch is not actually stored anywhere in the database
+ except in the field that is set by pg_resetwal,
+ so any value will work so far as the database itself is concerned.
+ You might need to adjust this value to ensure that replication
+ systems such as Slony-I> and
+ Skytools> work correctly —
+ if so, an appropriate value should be obtainable from the state of
+ the downstream replicated database.
+
+
+
+
+
+
-l
walfile
+
+
+ Manually set the WAL starting address.
+
+
+
+ The WAL starting address should be
+ larger than any WAL segment file name currently existing in
+ the directory pg_wal> under the data directory.
+ These names are also in hexadecimal and have three parts. The first
+ part is the timeline ID> and should usually be kept the same.
+ For example, if 00000001000000320000004A> is the
+ largest entry in pg_wal>, use -l 00000001000000320000004B> or higher.
+
+
+
+
+ pg_resetwal itself looks at the files in
+ pg_wal> and chooses a default
-l> setting
+ beyond the last existing file name. Therefore, manual adjustment of
+
-l> should only be needed if you are aware of WAL segment
+ files that are not currently present in pg_wal>, such as
+ entries in an offline archive; or if the contents of
+ pg_wal> have been lost entirely.
+
+
+
+
+
+
+
-m
mxid,mxid
+
+
+ Manually set the next and oldest multitransaction ID.
+
+
+
+ A safe value for the next multitransaction ID (first part) can be
+ determined by looking for the numerically largest file name in the
+ directory pg_multixact/offsets> under the data directory,
+ adding one, and then multiplying by 65536 (0x10000). Conversely, a safe
+ value for the oldest multitransaction ID (second part of
+
-m>) can be determined by looking for the numerically smallest
+ file name in the same directory and multiplying by 65536. The file
+ names are in hexadecimal, so the easiest way to do this is to specify
+ the option value in hexadecimal and append four zeroes.
+
+
+
+
+
+
-o
oid
+
+
+ Manually set the next OID.
+
+
+
+ There is no comparably easy way to determine a next OID that's beyond
+ the largest one in the database, but fortunately it is not critical to
+ get the next-OID setting right.
+
+
+
+
+
+
-O
mxoff
+
+
+ Manually set the next multitransaction offset.
+
+
+
+ A safe value can be determined by looking for the numerically largest
+ file name in the directory pg_multixact/members> under the
+ data directory, adding one, and then multiplying by 52352 (0xCC80).
+ The file names are in hexadecimal. There is no simple recipe such as
+ the ones for other options of appending zeroes.
+
+
+
+
+
+
-x
xid
+
+
+ Manually set the next transaction ID.
+
+
+
+ A safe value can be determined by looking for the numerically largest
+ file name in the directory pg_clog> under the data directory,
+ adding one,
+ and then multiplying by 1048576 (0x100000). Note that the file names are in
+ hexadecimal. It is usually easiest to specify the option value in
+ hexadecimal too. For example, if 0011> is the largest entry
+ in pg_clog>, -x 0x1200000> will work (five
+ trailing zeroes provide the proper multiplier).
+
+
+
+
+
+
+
+ Notes
+
+
+ This command must not be used when the server is
+ running. pg_resetwal will refuse to start up if
+ it finds a server lock file in the data directory. If the
+ server crashed then a lock file might have been left
+ behind; in that case you can remove the lock file to allow
+ pg_resetwal to run. But before you do
+ so, make doubly certain that there is no server process still alive.
+
+
+
+
+ See Also
+
+
+
+
+
+
diff --git a/doc/src/sgml/ref/pg_resetxlog.sgml b/doc/src/sgml/ref/pg_resetxlog.sgml
deleted file mode 100644
index c949c5e849..0000000000
--- a/doc/src/sgml/ref/pg_resetxlog.sgml
+++ /dev/null
@@ -1,293 +0,0 @@
-
-
-
-
- pg_resetxlog
-
-
-
- pg_resetxlog
- 1
- Application
-
-
-
- pg_resetxlog
- reset the write-ahead log and other control information of a PostgreSQL database cluster
-
-
-
-
- pg_resetxlog
-
-f
-
-n
- option
-
-D
datadir
-
-
-
-
- Description
-
- pg_resetxlog clears the write-ahead log (WAL) and
- optionally resets some other control information stored in the
- pg_control> file. This function is sometimes needed
- if these files have become corrupted. It should be used only as a
- last resort, when the server will not start due to such corruption.
-
-
-
- After running this command, it should be possible to start the server,
- but bear in mind that the database might contain inconsistent data due to
- partially-committed transactions. You should immediately dump your data,
- run initdb>, and reload. After reload, check for
- inconsistencies and repair as needed.
-
-
-
- This utility can only be run by the user who installed the server, because
- it requires read/write access to the data directory.
- For safety reasons, you must specify the data directory on the command line.
- pg_resetxlog does not use the environment variable
- PGDATA>.
-
-
-
- If pg_resetxlog complains that it cannot determine
- valid data for pg_control>, you can force it to proceed anyway
- by specifying the
-f> (force) option. In this case plausible
- values will be substituted for the missing data. Most of the fields can be
- expected to match, but manual assistance might be needed for the next OID,
- next transaction ID and epoch, next multitransaction ID and offset, and
- WAL starting address fields. These fields can be set using the options
- discussed below. If you are not able to determine correct values for all
- these fields,
-f> can still be used, but
- the recovered database must be treated with even more suspicion than
- usual: an immediate dump and reload is imperative. Do not>
- execute any data-modifying operations in the database before you dump,
- as any such action is likely to make the corruption worse.
-
-
-
-
- Options
-
-
-
-
-f
-
-
- Force pg_resetxlog to proceed even if it cannot determine
- valid data for pg_control>, as explained above.
-
-
-
-
-
-
-n
-
-
- The
-n> (no operation) option instructs
- pg_resetxlog to print the values reconstructed from
- pg_control> and values about to be changed, and then exit
- without modifying anything. This is mainly a debugging tool, but can be
- useful as a sanity check before allowing pg_resetxlog
- to proceed for real.
-
-
-
-
-
-
-V
-
--version
- Display version information, then exit.
-
-
-
-
-?
-
--help
- Show help, then exit.
-
-
-
-
- The following options are only needed when
- pg_resetxlog is unable to determine appropriate values
- by reading pg_control>. Safe values can be determined as
- described below. For values that take numeric arguments, hexadecimal
- values can be specified by using the prefix 0x.
-
-
-
-
-
-c
xid,xid
-
-
- Manually set the oldest and newest transaction IDs for which the commit
- time can be retrieved.
-
-
-
- A safe value for the oldest transaction ID for which the commit time can
- be retrieved (first part) can be determined by looking
- for the numerically smallest file name in the directory
- pg_commit_ts> under the data directory. Conversely, a safe
- value for the newest transaction ID for which the commit time can be
- retrieved (second part) can be determined by looking for the numerically
- greatest file name in the same directory. The file names are in
- hexadecimal.
-
-
-
-
-
-
-e
xid_epoch
-
-
- Manually set the next transaction ID's epoch.
-
-
-
- The transaction ID epoch is not actually stored anywhere in the database
- except in the field that is set by pg_resetxlog,
- so any value will work so far as the database itself is concerned.
- You might need to adjust this value to ensure that replication
- systems such as Slony-I> and
- Skytools> work correctly —
- if so, an appropriate value should be obtainable from the state of
- the downstream replicated database.
-
-
-
-
-
-
-l
xlogfile
-
-
- Manually set the WAL starting address.
-
-
-
- The WAL starting address should be
- larger than any WAL segment file name currently existing in
- the directory pg_wal> under the data directory.
- These names are also in hexadecimal and have three parts. The first
- part is the timeline ID> and should usually be kept the same.
- For example, if 00000001000000320000004A> is the
- largest entry in pg_wal>, use -l 00000001000000320000004B> or higher.
-
-
-
-
- pg_resetxlog itself looks at the files in
- pg_wal> and chooses a default
-l> setting
- beyond the last existing file name. Therefore, manual adjustment of
-
-l> should only be needed if you are aware of WAL segment
- files that are not currently present in pg_wal>, such as
- entries in an offline archive; or if the contents of
- pg_wal> have been lost entirely.
-
-
-
-
-
-
-
-m
mxid,mxid
-
-
- Manually set the next and oldest multitransaction ID.
-
-
-
- A safe value for the next multitransaction ID (first part) can be
- determined by looking for the numerically largest file name in the
- directory pg_multixact/offsets> under the data directory,
- adding one, and then multiplying by 65536 (0x10000). Conversely, a safe
- value for the oldest multitransaction ID (second part of
-
-m>) can be determined by looking for the numerically smallest
- file name in the same directory and multiplying by 65536. The file
- names are in hexadecimal, so the easiest way to do this is to specify
- the option value in hexadecimal and append four zeroes.
-
-
-
-
-
-
-o
oid
-
-
- Manually set the next OID.
-
-
-
- There is no comparably easy way to determine a next OID that's beyond
- the largest one in the database, but fortunately it is not critical to
- get the next-OID setting right.
-
-
-
-
-
-
-O
mxoff
-
-
- Manually set the next multitransaction offset.
-
-
-
- A safe value can be determined by looking for the numerically largest
- file name in the directory pg_multixact/members> under the
- data directory, adding one, and then multiplying by 52352 (0xCC80).
- The file names are in hexadecimal. There is no simple recipe such as
- the ones for other options of appending zeroes.
-
-
-
-
-
-
-x
xid
-
-
- Manually set the next transaction ID.
-
-
-
- A safe value can be determined by looking for the numerically largest
- file name in the directory pg_clog> under the data directory,
- adding one,
- and then multiplying by 1048576 (0x100000). Note that the file names are in
- hexadecimal. It is usually easiest to specify the option value in
- hexadecimal too. For example, if 0011> is the largest entry
- in pg_clog>, -x 0x1200000> will work (five
- trailing zeroes provide the proper multiplier).
-
-
-
-
-
-
-
- Notes
-
-
- This command must not be used when the server is
- running. pg_resetxlog will refuse to start up if
- it finds a server lock file in the data directory. If the
- server crashed then a lock file might have been left
- behind; in that case you can remove the lock file to allow
- pg_resetxlog to run. But before you do
- so, make doubly certain that there is no server process still alive.
-
-
-
-
- See Also
-
-
-
-
-
-
diff --git a/doc/src/sgml/ref/pg_waldump.sgml b/doc/src/sgml/ref/pg_waldump.sgml
new file mode 100644
index 0000000000..4c92eeed68
--- /dev/null
+++ b/doc/src/sgml/ref/pg_waldump.sgml
@@ -0,0 +1,237 @@
+
+
+
+
+ pg_waldump
+
+
+
+ pg_waldump
+ 1
+ Application
+
+
+
+ pg_waldump
+ display a human-readable rendering of the write-ahead log of a PostgreSQL database cluster
+
+
+
+
+ pg_waldump
+
option
+
startseg
+
endseg
+
+
+
+
+
+ Description
+
+ pg_waldump displays the write-ahead log (WAL) and is mainly
+ useful for debugging or educational purposes.
+
+
+
+ This utility can only be run by the user who installed the server, because
+ it requires read-only access to the data directory.
+
+
+
+
+ Options
+
+
+ The following command-line options control the location and format of the
+ output:
+
+
+
+
+ startseg
+
+
+ Start reading at the specified log segment file. This implicitly determines
+ the path in which files will be searched for, and the timeline to use.
+
+
+
+
+
+ endseg
+
+
+ Stop after reading the specified log segment file.
+
+
+
+
+
+
-b
+
--bkp-details
+
+
+ Output detailed information about backup blocks.
+
+
+
+
+
+
-e end
+
--end=end
+
+
+ Stop reading at the specified log position, instead of reading to the
+ end of the log stream.
+
+
+
+
+
+
-f
+
--follow
+
+
+ After reaching the end of valid WAL, keep polling once per second for
+ new WAL to appear.
+
+
+
+
+
+
-n limit
+
--limit=limit
+
+
+ Display the specified number of records, then stop.
+
+
+
+
+
+
-p path
+
--path=path
+
+
+ Specifies a directory to search for log segment files or a
+ directory with a pg_wal subdirectory that
+ contains such files. The default is to search in the current
+ directory, the pg_wal subdirectory of the
+ current directory, and the pg_wal subdirectory
+ of PGDATA.
+
+
+
+
+
+
-r rmgr
+
--rmgr=rmgr
+
+
+ Only display records generated by the specified resource manager.
+ If list> is passed as name, print a list of valid resource manager
+ names, and exit.
+
+
+
+
+
+
-s start
+
--start=start
+
+
+ Log position at which to start reading. The default is to start reading
+ the first valid log record found in the earliest file found.
+
+
+
+
+
+
-t timeline
+
--timeline=timeline
+
+
+ Timeline from which to read log records. The default is to use the
+ value in startseg>, if that is specified; otherwise, the
+ default is 1.
+
+
+
+
+
+
-V>
+
--version>
+
+
+ Print the pg_waldump version and exit.
+
+
+
+
+
+
-x xid
+
--xid=xid
+
+
+ Only display records marked with the given transaction ID.
+
+
+
+
+
+
-z
+
--stats[=record]
+
+
+ Display summary statistics (number and size of records and
+ full-page images) instead of individual records. Optionally
+ generate statistics per-record instead of per-rmgr.
+
+
+
+
+
+
-?>
+
--help>
+
+
+ Show help about pg_waldump command line
+ arguments, and exit.
+
+
+
+
+
+
+
+
+ Notes
+
+ Can give wrong results when the server is running.
+
+
+
+ Only the specified timeline is displayed (or the default, if none is
+ specified). Records in other timelines are ignored.
+
+
+
+ pg_waldump> cannot read WAL files with suffix
+ .partial>. If those files need to be read, .partial>
+ suffix needs to be removed from the file name.
+
+
+
+
+ See Also
+
+
+
+
+
+
+
diff --git a/doc/src/sgml/ref/pg_xlogdump.sgml b/doc/src/sgml/ref/pg_xlogdump.sgml
deleted file mode 100644
index 078b08e2e6..0000000000
--- a/doc/src/sgml/ref/pg_xlogdump.sgml
+++ /dev/null
@@ -1,237 +0,0 @@
-
-
-
-
- pg_xlogdump
-
-
-
- pg_xlogdump
- 1
- Application
-
-
-
- pg_xlogdump
- display a human-readable rendering of the write-ahead log of a PostgreSQL database cluster
-
-
-
-
- pg_xlogdump
-
option
-
startseg
-
endseg
-
-
-
-
-
- Description
-
- pg_xlogdump displays the write-ahead log (WAL) and is mainly
- useful for debugging or educational purposes.
-
-
-
- This utility can only be run by the user who installed the server, because
- it requires read-only access to the data directory.
-
-
-
-
- Options
-
-
- The following command-line options control the location and format of the
- output:
-
-
-
-
- startseg
-
-
- Start reading at the specified log segment file. This implicitly determines
- the path in which files will be searched for, and the timeline to use.
-
-
-
-
-
- endseg
-
-
- Stop after reading the specified log segment file.
-
-
-
-
-
-
-b
-
--bkp-details
-
-
- Output detailed information about backup blocks.
-
-
-
-
-
-
-e end
-
--end=end
-
-
- Stop reading at the specified log position, instead of reading to the
- end of the log stream.
-
-
-
-
-
-
-f
-
--follow
-
-
- After reaching the end of valid WAL, keep polling once per second for
- new WAL to appear.
-
-
-
-
-
-
-n limit
-
--limit=limit
-
-
- Display the specified number of records, then stop.
-
-
-
-
-
-
-p path
-
--path=path
-
-
- Specifies a directory to search for log segment files or a
- directory with a pg_wal subdirectory that
- contains such files. The default is to search in the current
- directory, the pg_wal subdirectory of the
- current directory, and the pg_wal subdirectory
- of PGDATA.
-
-
-
-
-
-
-r rmgr
-
--rmgr=rmgr
-
-
- Only display records generated by the specified resource manager.
- If list> is passed as name, print a list of valid resource manager
- names, and exit.
-
-
-
-
-
-
-s start
-
--start=start
-
-
- Log position at which to start reading. The default is to start reading
- the first valid log record found in the earliest file found.
-
-
-
-
-
-
-t timeline
-
--timeline=timeline
-
-
- Timeline from which to read log records. The default is to use the
- value in startseg>, if that is specified; otherwise, the
- default is 1.
-
-
-
-
-
-
-V>
-
--version>
-
-
- Print the pg_xlogdump version and exit.
-
-
-
-
-
-
-x xid
-
--xid=xid
-
-
- Only display records marked with the given transaction ID.
-
-
-
-
-
-
-z
-
--stats[=record]
-
-
- Display summary statistics (number and size of records and
- full-page images) instead of individual records. Optionally
- generate statistics per-record instead of per-rmgr.
-
-
-
-
-
-
-?>
-
--help>
-
-
- Show help about pg_xlogdump command line
- arguments, and exit.
-
-
-
-
-
-
-
-
- Notes
-
- Can give wrong results when the server is running.
-
-
-
- Only the specified timeline is displayed (or the default, if none is
- specified). Records in other timelines are ignored.
-
-
-
- pg_xlogdump> cannot read WAL files with suffix
- .partial>. If those files need to be read, .partial>
- suffix needs to be removed from the file name.
-
-
-
-
- See Also
-
-
-
-
-
-
-
diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml
index 34007d3508..c8191de9fe 100644
--- a/doc/src/sgml/reference.sgml
+++ b/doc/src/sgml/reference.sgml
@@ -245,7 +245,7 @@
&pgDump;
&pgDumpall;
&pgIsready;
- &pgReceivexlog;
+ &pgReceivewal;
&pgRecvlogical;
&pgRestore;
&psqlRef;
@@ -271,12 +271,12 @@
&pgarchivecleanup;
&pgControldata;
&pgCtl;
- &pgResetxlog;
+ &pgResetwal;
&pgRewind;
&pgtestfsync;
&pgtesttiming;
&pgupgrade;
- &pgxlogdump;
+ &pgwaldump;
&postgres;
&postmaster;
diff --git a/doc/src/sgml/release-9.2.sgml b/doc/src/sgml/release-9.2.sgml
index 9e99de882c..5c80517eaf 100644
--- a/doc/src/sgml/release-9.2.sgml
+++ b/doc/src/sgml/release-9.2.sgml
@@ -8069,7 +8069,7 @@ Branch: REL9_2_STABLE [6b700301c] 2015-02-17 16:03:00 +0100
Add a pg_receivexlog>
+ linkend="app-pgreceivewal">pg_receivexlog>
tool to archive WAL file changes as they are written
@@ -9231,7 +9231,7 @@ Branch: REL9_2_STABLE [6b700301c] 2015-02-17 16:03:00 +0100
Add a pg_receivexlog>
+ linkend="app-pgreceivewal">pg_receivexlog>
tool to archive WAL file changes as they are written, rather
than waiting for completed WAL files (Magnus Hagander)
diff --git a/doc/src/sgml/release-9.3.sgml b/doc/src/sgml/release-9.3.sgml
index 19bb305f0e..7f4adcd8c3 100644
--- a/doc/src/sgml/release-9.3.sgml
+++ b/doc/src/sgml/release-9.3.sgml
@@ -10080,7 +10080,7 @@ ALTER EXTENSION hstore UPDATE;
Allow tools like pg_receivexlog>
+ linkend="app-pgreceivewal">pg_receivexlog>
to run on computers with different architectures (Heikki
Linnakangas)
@@ -10110,7 +10110,7 @@ ALTER EXTENSION hstore UPDATE;
Allow pg_receivexlog>
+ linkend="app-pgreceivewal">pg_receivexlog>
and pg_basebackup>
--xlog-method> to handle streaming timeline switches
@@ -10713,7 +10713,7 @@ ALTER EXTENSION hstore UPDATE;
linkend="APP-PG-DUMPALL">pg_dumpall>, pg_basebackup>, and
pg_receivexlog>
+ linkend="app-pgreceivewal">pg_receivexlog>
to allow specifying a connection string (Amit Kapila)
@@ -11250,7 +11250,7 @@ ALTER EXTENSION hstore UPDATE;
- Add pg_xlogdump>
+ Add pg_xlogdump>
contrib program (Andres Freund)
diff --git a/doc/src/sgml/release-9.4.sgml b/doc/src/sgml/release-9.4.sgml
index bbf2429475..4b7e41b67f 100644
--- a/doc/src/sgml/release-9.4.sgml
+++ b/doc/src/sgml/release-9.4.sgml
@@ -8917,8 +8917,9 @@ Branch: REL9_4_STABLE [c2b06ab17] 2015-01-30 22:45:58 -0500
- Make with option
-n>
- output current and potentially changed values (Rajeev Rastogi)
+ Make pg_resetxlog>>
+ with option
-n> output current and potentially changed
+ values (Rajeev Rastogi)
@@ -9518,8 +9519,9 @@ Branch: REL9_4_STABLE [c2b06ab17] 2015-01-30 22:45:58 -0500
- Allow to report a live log stream
- with
--follow> (Heikki Linnakangas)
+ Allow pg_xlogdump>>
+ to report a live log stream with
--follow>
+ (Heikki Linnakangas)
diff --git a/doc/src/sgml/release-9.5.sgml b/doc/src/sgml/release-9.5.sgml
index aab3ddc3c1..c2c653a9b3 100644
--- a/doc/src/sgml/release-9.5.sgml
+++ b/doc/src/sgml/release-9.5.sgml
@@ -6265,7 +6265,7 @@ Add GUC and storage parameter to set the maximum size of GIN pending list.
linkend="pgarchivecleanup">pg_archivecleanup>>,
pg_test_fsync>>,
pg_test_timing>>,
- and pg_xlogdump>>
+ and pg_xlogdump>>
from contrib> to src/bin> (Peter Eisentraut)
@@ -6292,7 +6292,7 @@ Add GUC and storage parameter to set the maximum size of GIN pending list.
-->
Allow pg_receivexlog>>
+ linkend="app-pgreceivewal">pg_receivexlog>>
to manage physical replication slots (Michael Paquier)
@@ -6308,7 +6308,7 @@ Add GUC and storage parameter to set the maximum size of GIN pending list.
-->
Allow pg_receivexlog>>
+ linkend="app-pgreceivewal">pg_receivexlog>>
to synchronously flush WAL> to storage using new
--synchronous> option (Furuya Osamu, Fujii Masao)
@@ -6368,7 +6368,7 @@ Add GUC and storage parameter to set the maximum size of GIN pending list.
2014-09-19 [bdd5726] Andres..: Add the capability to display summary statistic..
-->
- Add pg_xlogdump>> option
+ Add pg_xlogdump>> option
--stats> to display summary statistics (Abhijit Menon-Sen)
diff --git a/src/backend/access/rmgrdesc/xactdesc.c b/src/backend/access/rmgrdesc/xactdesc.c
index c91ca03db1..735f8c598f 100644
--- a/src/backend/access/rmgrdesc/xactdesc.c
+++ b/src/backend/access/rmgrdesc/xactdesc.c
@@ -26,7 +26,7 @@
* understand format.
*
* This routines are in xactdesc.c because they're accessed in backend (when
- * replaying WAL) and frontend (pg_xlogdump) code. This file is the only xact
+ * replaying WAL) and frontend (pg_waldump) code. This file is the only xact
* specific one shared between both. They're complicated enough that
* duplication would be bothersome.
*/
diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c
index fc084c5bdb..42fc351f7b 100644
--- a/src/backend/access/transam/varsup.c
+++ b/src/backend/access/transam/varsup.c
@@ -399,7 +399,7 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
*
* We primarily check whether oldestXidDB is valid. The cases we have in
* mind are that that database was dropped, or the field was reset to zero
- * by pg_resetxlog. In either case we should force recalculation of the
+ * by pg_resetwal. In either case we should force recalculation of the
* wrap limit. Also do it if oldestXid is old enough to be forcing
* autovacuums or other actions; this ensures we update our state as soon
* as possible once extra overhead is being incurred.
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index 6627f5498b..8b99b78249 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -647,7 +647,7 @@ XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum,
* always be one descriptor left open until the process ends, but never
* more than one.
*
- * XXX This is very similar to pg_xlogdump's XLogDumpXLogRead and to XLogRead
+ * XXX This is very similar to pg_waldump's XLogDumpXLogRead and to XLogRead
* in walsender.c but for small differences (such as lack of elog() in
* frontend). Probably these should be merged at some point.
*/
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index 62020b6ed0..c0f28ddc09 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -1473,7 +1473,7 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
/*
* We identify snapshots by the LSN they are valid for. We don't need to
* include timelines in the name as each LSN maps to exactly one timeline
- * unless the user used pg_resetxlog or similar. If a user did so, there's
+ * unless the user used pg_resetwal or similar. If a user did so, there's
* no hope continuing to decode anyway.
*/
sprintf(path, "pg_logical/snapshots/%X-%X.snap",
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 76f09fbdbf..ba506e28bc 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -2435,7 +2435,7 @@ WalSndDone(WalSndSendDataCallback send_data)
/*
* To figure out whether all WAL has successfully been replicated, check
- * flush location if valid, write otherwise. Tools like pg_receivexlog
+ * flush location if valid, write otherwise. Tools like pg_receivewal
* will usually (unless in synchronous mode) return an invalid flush
* location.
*/
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index de85eca6a8..0249721204 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2639,7 +2639,7 @@ static struct config_int ConfigureNamesInt[] =
NULL
},
&autovacuum_freeze_max_age,
- /* see pg_resetxlog if you change the upper-limit value */
+ /* see pg_resetwal if you change the upper-limit value */
200000000, 100000, 2000000000,
NULL, NULL, NULL
},
diff --git a/src/bin/Makefile b/src/bin/Makefile
index e0a5d92028..bc96f37dfc 100644
--- a/src/bin/Makefile
+++ b/src/bin/Makefile
@@ -21,12 +21,12 @@ SUBDIRS = \
pg_controldata \
pg_ctl \
pg_dump \
- pg_resetxlog \
+ pg_resetwal \
pg_rewind \
pg_test_fsync \
pg_test_timing \
pg_upgrade \
- pg_xlogdump \
+ pg_waldump \
pgbench \
psql \
scripts
diff --git a/src/bin/pg_basebackup/.gitignore b/src/bin/pg_basebackup/.gitignore
index 36a2f12d61..26048bdbd8 100644
--- a/src/bin/pg_basebackup/.gitignore
+++ b/src/bin/pg_basebackup/.gitignore
@@ -1,5 +1,5 @@
/pg_basebackup
-/pg_receivexlog
+/pg_receivewal
/pg_recvlogical
/tmp_check/
diff --git a/src/bin/pg_basebackup/Makefile b/src/bin/pg_basebackup/Makefile
index a6c307492f..f0c3be83f3 100644
--- a/src/bin/pg_basebackup/Makefile
+++ b/src/bin/pg_basebackup/Makefile
@@ -9,7 +9,7 @@
#
#-------------------------------------------------------------------------
-PGFILEDESC = "pg_basebackup/pg_receivexlog/pg_recvlogical - streaming WAL and backup receivers"
+PGFILEDESC = "pg_basebackup/pg_receivewal/pg_recvlogical - streaming WAL and backup receivers"
PGAPPICON=win32
EXTRA_INSTALL=contrib/test_decoding
@@ -23,20 +23,20 @@ LDFLAGS += -L$(top_builddir)/src/fe_utils -lpgfeutils -lpq
OBJS=receivelog.o streamutil.o walmethods.o $(WIN32RES)
-all: pg_basebackup pg_receivexlog pg_recvlogical
+all: pg_basebackup pg_receivewal pg_recvlogical
pg_basebackup: pg_basebackup.o $(OBJS) | submake-libpq submake-libpgport submake-libpgfeutils
$(CC) $(CFLAGS) pg_basebackup.o $(OBJS) $(libpq_pgport) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
-pg_receivexlog: pg_receivexlog.o $(OBJS) | submake-libpq submake-libpgport submake-libpgfeutils
- $(CC) $(CFLAGS) pg_receivexlog.o $(OBJS) $(libpq_pgport) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
+pg_receivewal: pg_receivewal.o $(OBJS) | submake-libpq submake-libpgport submake-libpgfeutils
+ $(CC) $(CFLAGS) pg_receivewal.o $(OBJS) $(libpq_pgport) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
pg_recvlogical: pg_recvlogical.o $(OBJS) | submake-libpq submake-libpgport submake-libpgfeutils
$(CC) $(CFLAGS) pg_recvlogical.o $(OBJS) $(libpq_pgport) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
install: all installdirs
$(INSTALL_PROGRAM) pg_basebackup$(X) '$(DESTDIR)$(bindir)/pg_basebackup$(X)'
- $(INSTALL_PROGRAM) pg_receivexlog$(X) '$(DESTDIR)$(bindir)/pg_receivexlog$(X)'
+ $(INSTALL_PROGRAM) pg_receivewal$(X) '$(DESTDIR)$(bindir)/pg_receivewal$(X)'
$(INSTALL_PROGRAM) pg_recvlogical$(X) '$(DESTDIR)$(bindir)/pg_recvlogical$(X)'
installdirs:
@@ -44,12 +44,12 @@ installdirs:
uninstall:
rm -f '$(DESTDIR)$(bindir)/pg_basebackup$(X)'
- rm -f '$(DESTDIR)$(bindir)/pg_receivexlog$(X)'
+ rm -f '$(DESTDIR)$(bindir)/pg_receivewal$(X)'
rm -f '$(DESTDIR)$(bindir)/pg_recvlogical$(X)'
clean distclean maintainer-clean:
- rm -f pg_basebackup$(X) pg_receivexlog$(X) pg_recvlogical$(X) \
- pg_basebackup.o pg_receivexlog.o pg_recvlogical.o \
+ rm -f pg_basebackup$(X) pg_receivewal$(X) pg_recvlogical$(X) \
+ pg_basebackup.o pg_receivewal.o pg_recvlogical.o \
$(OBJS)
rm -rf tmp_check
diff --git a/src/bin/pg_basebackup/nls.mk b/src/bin/pg_basebackup/nls.mk
index dba43b857e..2a6de08a64 100644
--- a/src/bin/pg_basebackup/nls.mk
+++ b/src/bin/pg_basebackup/nls.mk
@@ -1,5 +1,5 @@
# src/bin/pg_basebackup/nls.mk
CATALOG_NAME = pg_basebackup
AVAIL_LANGUAGES = de es fr it ko pl pt_BR ru zh_CN
-GETTEXT_FILES = pg_basebackup.c pg_receivexlog.c pg_recvlogical.c receivelog.c streamutil.c ../../common/fe_memutils.c ../../common/file_utils.c
+GETTEXT_FILES = pg_basebackup.c pg_receivewal.c pg_recvlogical.c receivelog.c streamutil.c ../../common/fe_memutils.c ../../common/file_utils.c
GETTEXT_TRIGGERS = simple_prompt
diff --git a/src/bin/pg_basebackup/pg_receivewal.c b/src/bin/pg_basebackup/pg_receivewal.c
new file mode 100644
index 0000000000..e8d0e8984e
--- /dev/null
+++ b/src/bin/pg_basebackup/pg_receivewal.c
@@ -0,0 +1,738 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_receivewal.c - receive streaming transaction log data and write it
+ * to a local file.
+ *
+ * Author: Magnus Hagander
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/bin/pg_basebackup/pg_receivewal.c
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres_fe.h"
+
+#include
+#include
+#include
+#include
+#include
+
+#include "libpq-fe.h"
+#include "access/xlog_internal.h"
+#include "getopt_long.h"
+
+#include "receivelog.h"
+#include "streamutil.h"
+
+
+/* Time to sleep between reconnection attempts */
+#define RECONNECT_SLEEP_TIME 5
+
+/* Global options */
+static char *basedir = NULL;
+static int verbose = 0;
+static int compresslevel = 0;
+static int noloop = 0;
+static int standby_message_timeout = 10 * 1000; /* 10 sec = default */
+static volatile bool time_to_abort = false;
+static bool do_create_slot = false;
+static bool slot_exists_ok = false;
+static bool do_drop_slot = false;
+static bool synchronous = false;
+static char *replication_slot = NULL;
+
+
+static void usage(void);
+static DIR *get_destination_dir(char *dest_folder);
+static void close_destination_dir(DIR *dest_dir, char *dest_folder);
+static XLogRecPtr FindStreamingStart(uint32 *tli);
+static void StreamLog(void);
+static bool stop_streaming(XLogRecPtr segendpos, uint32 timeline,
+ bool segment_finished);
+
+#define disconnect_and_exit(code) \
+ { \
+ if (conn != NULL) PQfinish(conn); \
+ exit(code); \
+ }
+
+/* Routines to evaluate segment file format */
+#define IsCompressXLogFileName(fname) \
+ (strlen(fname) == XLOG_FNAME_LEN + strlen(".gz") && \
+ strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN && \
+ strcmp((fname) + XLOG_FNAME_LEN, ".gz") == 0)
+#define IsPartialCompressXLogFileName(fname) \
+ (strlen(fname) == XLOG_FNAME_LEN + strlen(".gz.partial") && \
+ strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN && \
+ strcmp((fname) + XLOG_FNAME_LEN, ".gz.partial") == 0)
+
+static void
+usage(void)
+{
+ printf(_("%s receives PostgreSQL streaming transaction logs.\n\n"),
+ progname);
+ printf(_("Usage:\n"));
+ printf(_(" %s [OPTION]...\n"), progname);
+ printf(_("\nOptions:\n"));
+ printf(_(" -D, --directory=DIR receive transaction log files into this directory\n"));
+ printf(_(" --if-not-exists do not error if slot already exists when creating a slot\n"));
+ printf(_(" -n, --no-loop do not loop on connection lost\n"));
+ printf(_(" -s, --status-interval=SECS\n"
+ " time between status packets sent to server (default: %d)\n"), (standby_message_timeout / 1000));
+ printf(_(" -S, --slot=SLOTNAME replication slot to use\n"));
+ printf(_(" --synchronous flush transaction log immediately after writing\n"));
+ printf(_(" -v, --verbose output verbose messages\n"));
+ printf(_(" -V, --version output version information, then exit\n"));
+ printf(_(" -Z, --compress=0-9 compress logs with given compression level\n"));
+ printf(_(" -?, --help show this help, then exit\n"));
+ printf(_("\nConnection options:\n"));
+ printf(_(" -d, --dbname=CONNSTR connection string\n"));
+ printf(_(" -h, --host=HOSTNAME database server host or socket directory\n"));
+ printf(_(" -p, --port=PORT database server port number\n"));
+ printf(_(" -U, --username=NAME connect as specified database user\n"));
+ printf(_(" -w, --no-password never prompt for password\n"));
+ printf(_(" -W, --password force password prompt (should happen automatically)\n"));
+ printf(_("\nOptional actions:\n"));
+ printf(_(" --create-slot create a new replication slot (for the slot's name see --slot)\n"));
+ printf(_(" --drop-slot drop the replication slot (for the slot's name see --slot)\n"));
+ printf(_("\nReport bugs to .\n"));
+}
+
+static bool
+stop_streaming(XLogRecPtr xlogpos, uint32 timeline, bool segment_finished)
+{
+ static uint32 prevtimeline = 0;
+ static XLogRecPtr prevpos = InvalidXLogRecPtr;
+
+ /* we assume that we get called once at the end of each segment */
+ if (verbose && segment_finished)
+ fprintf(stderr, _("%s: finished segment at %X/%X (timeline %u)\n"),
+ progname, (uint32) (xlogpos >> 32), (uint32) xlogpos,
+ timeline);
+
+ /*
+ * Note that we report the previous, not current, position here. After a
+ * timeline switch, xlogpos points to the beginning of the segment because
+ * that's where we always begin streaming. Reporting the end of previous
+ * timeline isn't totally accurate, because the next timeline can begin
+ * slightly before the end of the WAL that we received on the previous
+ * timeline, but it's close enough for reporting purposes.
+ */
+ if (prevtimeline != 0 && prevtimeline != timeline)
+ fprintf(stderr, _("%s: switched to timeline %u at %X/%X\n"),
+ progname, timeline,
+ (uint32) (prevpos >> 32), (uint32) prevpos);
+
+ prevtimeline = timeline;
+ prevpos = xlogpos;
+
+ if (time_to_abort)
+ {
+ fprintf(stderr, _("%s: received interrupt signal, exiting\n"),
+ progname);
+ return true;
+ }
+ return false;
+}
+
+
+/*
+ * Get destination directory.
+ */
+static DIR *
+get_destination_dir(char *dest_folder)
+{
+ DIR *dir;
+
+ Assert(dest_folder != NULL);
+ dir = opendir(dest_folder);
+ if (dir == NULL)
+ {
+ fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
+ progname, basedir, strerror(errno));
+ disconnect_and_exit(1);
+ }
+
+ return dir;
+}
+
+
+/*
+ * Close existing directory.
+ */
+static void
+close_destination_dir(DIR *dest_dir, char *dest_folder)
+{
+ Assert(dest_dir != NULL && dest_folder != NULL);
+ if (closedir(dest_dir))
+ {
+ fprintf(stderr, _("%s: could not close directory \"%s\": %s\n"),
+ progname, dest_folder, strerror(errno));
+ disconnect_and_exit(1);
+ }
+}
+
+
+/*
+ * Determine starting location for streaming, based on any existing xlog
+ * segments in the directory. We start at the end of the last one that is
+ * complete (size matches XLogSegSize), on the timeline with highest ID.
+ *
+ * If there are no WAL files in the directory, returns InvalidXLogRecPtr.
+ */
+static XLogRecPtr
+FindStreamingStart(uint32 *tli)
+{
+ DIR *dir;
+ struct dirent *dirent;
+ XLogSegNo high_segno = 0;
+ uint32 high_tli = 0;
+ bool high_ispartial = false;
+
+ dir = get_destination_dir(basedir);
+
+ while (errno = 0, (dirent = readdir(dir)) != NULL)
+ {
+ uint32 tli;
+ XLogSegNo segno;
+ bool ispartial;
+ bool iscompress;
+
+ /*
+ * Check if the filename looks like an xlog file, or a .partial file.
+ */
+ if (IsXLogFileName(dirent->d_name))
+ {
+ ispartial = false;
+ iscompress = false;
+ }
+ else if (IsPartialXLogFileName(dirent->d_name))
+ {
+ ispartial = true;
+ iscompress = false;
+ }
+ else if (IsCompressXLogFileName(dirent->d_name))
+ {
+ ispartial = false;
+ iscompress = true;
+ }
+ else if (IsPartialCompressXLogFileName(dirent->d_name))
+ {
+ ispartial = true;
+ iscompress = true;
+ }
+ else
+ continue;
+
+ /*
+ * Looks like an xlog file. Parse its position.
+ */
+ XLogFromFileName(dirent->d_name, &tli, &segno);
+
+ /*
+ * Check that the segment has the right size, if it's supposed to be
+ * completed. For non-compressed segments just check the on-disk size
+ * and see if it matches a completed segment.
+ * For compressed segments, look at the last 4 bytes of the compressed
+ * file, which is where the uncompressed size is located for gz files
+ * with a size lower than 4GB, and then compare it to the size of a
+ * completed segment. The 4 last bytes correspond to the ISIZE member
+ * according to http://www.zlib.org/rfc-gzip.html.
+ */
+ if (!ispartial && !iscompress)
+ {
+ struct stat statbuf;
+ char fullpath[MAXPGPATH];
+
+ snprintf(fullpath, sizeof(fullpath), "%s/%s", basedir, dirent->d_name);
+ if (stat(fullpath, &statbuf) != 0)
+ {
+ fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"),
+ progname, fullpath, strerror(errno));
+ disconnect_and_exit(1);
+ }
+
+ if (statbuf.st_size != XLOG_SEG_SIZE)
+ {
+ fprintf(stderr,
+ _("%s: segment file \"%s\" has incorrect size %d, skipping\n"),
+ progname, dirent->d_name, (int) statbuf.st_size);
+ continue;
+ }
+ }
+ else if (!ispartial && iscompress)
+ {
+ int fd;
+ char buf[4];
+ int bytes_out;
+ char fullpath[MAXPGPATH];
+
+ snprintf(fullpath, sizeof(fullpath), "%s/%s", basedir, dirent->d_name);
+
+ fd = open(fullpath, O_RDONLY | PG_BINARY);
+ if (fd < 0)
+ {
+ fprintf(stderr, _("%s: could not open compressed file \"%s\": %s\n"),
+ progname, fullpath, strerror(errno));
+ disconnect_and_exit(1);
+ }
+ if (lseek(fd, (off_t)(-4), SEEK_END) < 0)
+ {
+ fprintf(stderr, _("%s: could not seek compressed file \"%s\": %s\n"),
+ progname, fullpath, strerror(errno));
+ disconnect_and_exit(1);
+ }
+ if (read(fd, (char *) buf, sizeof(buf)) != sizeof(buf))
+ {
+ fprintf(stderr, _("%s: could not read compressed file \"%s\": %s\n"),
+ progname, fullpath, strerror(errno));
+ disconnect_and_exit(1);
+ }
+
+ close(fd);
+ bytes_out = (buf[3] << 24) | (buf[2] << 16) |
+ (buf[1] << 8) | buf[0];
+
+ if (bytes_out != XLOG_SEG_SIZE)
+ {
+ fprintf(stderr,
+ _("%s: compressed segment file \"%s\" has incorrect uncompressed size %d, skipping\n"),
+ progname, dirent->d_name, bytes_out);
+ continue;
+ }
+ }
+
+ /* Looks like a valid segment. Remember that we saw it. */
+ if ((segno > high_segno) ||
+ (segno == high_segno && tli > high_tli) ||
+ (segno == high_segno && tli == high_tli && high_ispartial && !ispartial))
+ {
+ high_segno = segno;
+ high_tli = tli;
+ high_ispartial = ispartial;
+ }
+ }
+
+ if (errno)
+ {
+ fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"),
+ progname, basedir, strerror(errno));
+ disconnect_and_exit(1);
+ }
+
+ close_destination_dir(dir, basedir);
+
+ if (high_segno > 0)
+ {
+ XLogRecPtr high_ptr;
+
+ /*
+ * Move the starting pointer to the start of the next segment, if the
+ * highest one we saw was completed. Otherwise start streaming from
+ * the beginning of the .partial segment.
+ */
+ if (!high_ispartial)
+ high_segno++;
+
+ XLogSegNoOffsetToRecPtr(high_segno, 0, high_ptr);
+
+ *tli = high_tli;
+ return high_ptr;
+ }
+ else
+ return InvalidXLogRecPtr;
+}
+
+/*
+ * Start the log streaming
+ */
+static void
+StreamLog(void)
+{
+ XLogRecPtr serverpos;
+ TimeLineID servertli;
+ StreamCtl stream;
+
+ MemSet(&stream, 0, sizeof(stream));
+
+ /*
+ * Connect in replication mode to the server
+ */
+ if (conn == NULL)
+ conn = GetConnection();
+ if (!conn)
+ /* Error message already written in GetConnection() */
+ return;
+
+ if (!CheckServerVersionForStreaming(conn))
+ {
+ /*
+ * Error message already written in CheckServerVersionForStreaming().
+ * There's no hope of recovering from a version mismatch, so don't
+ * retry.
+ */
+ disconnect_and_exit(1);
+ }
+
+ /*
+ * Identify server, obtaining start LSN position and current timeline ID
+ * at the same time, necessary if not valid data can be found in the
+ * existing output directory.
+ */
+ if (!RunIdentifySystem(conn, NULL, &servertli, &serverpos, NULL))
+ disconnect_and_exit(1);
+
+ /*
+ * Figure out where to start streaming.
+ */
+ stream.startpos = FindStreamingStart(&stream.timeline);
+ if (stream.startpos == InvalidXLogRecPtr)
+ {
+ stream.startpos = serverpos;
+ stream.timeline = servertli;
+ }
+
+ /*
+ * Always start streaming at the beginning of a segment
+ */
+ stream.startpos -= stream.startpos % XLOG_SEG_SIZE;
+
+ /*
+ * Start the replication
+ */
+ if (verbose)
+ fprintf(stderr,
+ _("%s: starting log streaming at %X/%X (timeline %u)\n"),
+ progname, (uint32) (stream.startpos >> 32), (uint32) stream.startpos,
+ stream.timeline);
+
+ stream.stream_stop = stop_streaming;
+ stream.standby_message_timeout = standby_message_timeout;
+ stream.synchronous = synchronous;
+ stream.do_sync = true;
+ stream.mark_done = false;
+ stream.walmethod = CreateWalDirectoryMethod(basedir, compresslevel,
+ stream.do_sync);
+ stream.partial_suffix = ".partial";
+ stream.replication_slot = replication_slot;
+ stream.temp_slot = false;
+
+ ReceiveXlogStream(conn, &stream);
+
+ if (!stream.walmethod->finish())
+ {
+ fprintf(stderr,
+ _("%s: could not finish writing WAL files: %s\n"),
+ progname, strerror(errno));
+ return;
+ }
+
+ PQfinish(conn);
+
+ FreeWalDirectoryMethod();
+ pg_free(stream.walmethod);
+
+ conn = NULL;
+}
+
+/*
+ * When sigint is called, just tell the system to exit at the next possible
+ * moment.
+ */
+#ifndef WIN32
+
+static void
+sigint_handler(int signum)
+{
+ time_to_abort = true;
+}
+#endif
+
+int
+main(int argc, char **argv)
+{
+ static struct option long_options[] = {
+ {"help", no_argument, NULL, '?'},
+ {"version", no_argument, NULL, 'V'},
+ {"directory", required_argument, NULL, 'D'},
+ {"dbname", required_argument, NULL, 'd'},
+ {"host", required_argument, NULL, 'h'},
+ {"port", required_argument, NULL, 'p'},
+ {"username", required_argument, NULL, 'U'},
+ {"no-loop", no_argument, NULL, 'n'},
+ {"no-password", no_argument, NULL, 'w'},
+ {"password", no_argument, NULL, 'W'},
+ {"status-interval", required_argument, NULL, 's'},
+ {"slot", required_argument, NULL, 'S'},
+ {"verbose", no_argument, NULL, 'v'},
+ {"compress", required_argument, NULL, 'Z'},
+/* action */
+ {"create-slot", no_argument, NULL, 1},
+ {"drop-slot", no_argument, NULL, 2},
+ {"if-not-exists", no_argument, NULL, 3},
+ {"synchronous", no_argument, NULL, 4},
+ {NULL, 0, NULL, 0}
+ };
+
+ int c;
+ int option_index;
+ char *db_name;
+
+ progname = get_progname(argv[0]);
+ set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_basebackup"));
+
+ if (argc > 1)
+ {
+ if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
+ {
+ usage();
+ exit(0);
+ }
+ else if (strcmp(argv[1], "-V") == 0 ||
+ strcmp(argv[1], "--version") == 0)
+ {
+ puts("pg_receivewal (PostgreSQL) " PG_VERSION);
+ exit(0);
+ }
+ }
+
+ while ((c = getopt_long(argc, argv, "D:d:h:p:U:s:S:nwWvZ:",
+ long_options, &option_index)) != -1)
+ {
+ switch (c)
+ {
+ case 'D':
+ basedir = pg_strdup(optarg);
+ break;
+ case 'd':
+ connection_string = pg_strdup(optarg);
+ break;
+ case 'h':
+ dbhost = pg_strdup(optarg);
+ break;
+ case 'p':
+ if (atoi(optarg) <= 0)
+ {
+ fprintf(stderr, _("%s: invalid port number \"%s\"\n"),
+ progname, optarg);
+ exit(1);
+ }
+ dbport = pg_strdup(optarg);
+ break;
+ case 'U':
+ dbuser = pg_strdup(optarg);
+ break;
+ case 'w':
+ dbgetpassword = -1;
+ break;
+ case 'W':
+ dbgetpassword = 1;
+ break;
+ case 's':
+ standby_message_timeout = atoi(optarg) * 1000;
+ if (standby_message_timeout < 0)
+ {
+ fprintf(stderr, _("%s: invalid status interval \"%s\"\n"),
+ progname, optarg);
+ exit(1);
+ }
+ break;
+ case 'S':
+ replication_slot = pg_strdup(optarg);
+ break;
+ case 'n':
+ noloop = 1;
+ break;
+ case 'v':
+ verbose++;
+ break;
+ case 'Z':
+ compresslevel = atoi(optarg);
+ if (compresslevel < 0 || compresslevel > 9)
+ {
+ fprintf(stderr, _("%s: invalid compression level \"%s\"\n"),
+ progname, optarg);
+ exit(1);
+ }
+ break;
+/* action */
+ case 1:
+ do_create_slot = true;
+ break;
+ case 2:
+ do_drop_slot = true;
+ break;
+ case 3:
+ slot_exists_ok = true;
+ break;
+ case 4:
+ synchronous = true;
+ break;
+ default:
+
+ /*
+ * getopt_long already emitted a complaint
+ */
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+ progname);
+ exit(1);
+ }
+ }
+
+ /*
+ * Any non-option arguments?
+ */
+ if (optind < argc)
+ {
+ fprintf(stderr,
+ _("%s: too many command-line arguments (first is \"%s\")\n"),
+ progname, argv[optind]);
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+ progname);
+ exit(1);
+ }
+
+ if (do_drop_slot && do_create_slot)
+ {
+ fprintf(stderr, _("%s: cannot use --create-slot together with --drop-slot\n"), progname);
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+ progname);
+ exit(1);
+ }
+
+ if (replication_slot == NULL && (do_drop_slot || do_create_slot))
+ {
+ /* translator: second %s is an option name */
+ fprintf(stderr, _("%s: %s needs a slot to be specified using --slot\n"), progname,
+ do_drop_slot ? "--drop-slot" : "--create-slot");
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+ progname);
+ exit(1);
+ }
+
+ /*
+ * Required arguments
+ */
+ if (basedir == NULL && !do_drop_slot && !do_create_slot)
+ {
+ fprintf(stderr, _("%s: no target directory specified\n"), progname);
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+ progname);
+ exit(1);
+ }
+
+#ifndef HAVE_LIBZ
+ if (compresslevel != 0)
+ {
+ fprintf(stderr,
+ _("%s: this build does not support compression\n"),
+ progname);
+ exit(1);
+ }
+#endif
+
+ /*
+ * Check existence of destination folder.
+ */
+ if (!do_drop_slot && !do_create_slot)
+ {
+ DIR *dir = get_destination_dir(basedir);
+
+ close_destination_dir(dir, basedir);
+ }
+
+#ifndef WIN32
+ pqsignal(SIGINT, sigint_handler);
+#endif
+
+ /*
+ * Obtain a connection before doing anything.
+ */
+ conn = GetConnection();
+ if (!conn)
+ /* error message already written in GetConnection() */
+ exit(1);
+
+ /*
+ * Run IDENTIFY_SYSTEM to make sure we've successfully have established a
+ * replication connection and haven't connected using a database specific
+ * connection.
+ */
+ if (!RunIdentifySystem(conn, NULL, NULL, NULL, &db_name))
+ disconnect_and_exit(1);
+
+ /*
+ * Check that there is a database associated with connection, none should
+ * be defined in this context.
+ */
+ if (db_name)
+ {
+ fprintf(stderr,
+ _("%s: replication connection using slot \"%s\" is unexpectedly database specific\n"),
+ progname, replication_slot);
+ disconnect_and_exit(1);
+ }
+
+ /*
+ * Drop a replication slot.
+ */
+ if (do_drop_slot)
+ {
+ if (verbose)
+ fprintf(stderr,
+ _("%s: dropping replication slot \"%s\"\n"),
+ progname, replication_slot);
+
+ if (!DropReplicationSlot(conn, replication_slot))
+ disconnect_and_exit(1);
+ disconnect_and_exit(0);
+ }
+
+ /* Create a replication slot */
+ if (do_create_slot)
+ {
+ if (verbose)
+ fprintf(stderr,
+ _("%s: creating replication slot \"%s\"\n"),
+ progname, replication_slot);
+
+ if (!CreateReplicationSlot(conn, replication_slot, NULL, true,
+ slot_exists_ok))
+ disconnect_and_exit(1);
+ disconnect_and_exit(0);
+ }
+
+ /*
+ * Don't close the connection here so that subsequent StreamLog() can
+ * reuse it.
+ */
+
+ while (true)
+ {
+ StreamLog();
+ if (time_to_abort)
+ {
+ /*
+ * We've been Ctrl-C'ed. That's not an error, so exit without an
+ * errorcode.
+ */
+ exit(0);
+ }
+ else if (noloop)
+ {
+ fprintf(stderr, _("%s: disconnected\n"), progname);
+ exit(1);
+ }
+ else
+ {
+ fprintf(stderr,
+ /* translator: check source for value for %d */
+ _("%s: disconnected; waiting %d seconds to try again\n"),
+ progname, RECONNECT_SLEEP_TIME);
+ pg_usleep(RECONNECT_SLEEP_TIME * 1000000);
+ }
+ }
+}
diff --git a/src/bin/pg_basebackup/pg_receivexlog.c b/src/bin/pg_basebackup/pg_receivexlog.c
deleted file mode 100644
index 135e2070f3..0000000000
--- a/src/bin/pg_basebackup/pg_receivexlog.c
+++ /dev/null
@@ -1,738 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * pg_receivexlog.c - receive streaming transaction log data and write it
- * to a local file.
- *
- * Author: Magnus Hagander
- *
- * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
- *
- * IDENTIFICATION
- * src/bin/pg_basebackup/pg_receivexlog.c
- *-------------------------------------------------------------------------
- */
-
-#include "postgres_fe.h"
-
-#include
-#include
-#include
-#include
-#include
-
-#include "libpq-fe.h"
-#include "access/xlog_internal.h"
-#include "getopt_long.h"
-
-#include "receivelog.h"
-#include "streamutil.h"
-
-
-/* Time to sleep between reconnection attempts */
-#define RECONNECT_SLEEP_TIME 5
-
-/* Global options */
-static char *basedir = NULL;
-static int verbose = 0;
-static int compresslevel = 0;
-static int noloop = 0;
-static int standby_message_timeout = 10 * 1000; /* 10 sec = default */
-static volatile bool time_to_abort = false;
-static bool do_create_slot = false;
-static bool slot_exists_ok = false;
-static bool do_drop_slot = false;
-static bool synchronous = false;
-static char *replication_slot = NULL;
-
-
-static void usage(void);
-static DIR *get_destination_dir(char *dest_folder);
-static void close_destination_dir(DIR *dest_dir, char *dest_folder);
-static XLogRecPtr FindStreamingStart(uint32 *tli);
-static void StreamLog(void);
-static bool stop_streaming(XLogRecPtr segendpos, uint32 timeline,
- bool segment_finished);
-
-#define disconnect_and_exit(code) \
- { \
- if (conn != NULL) PQfinish(conn); \
- exit(code); \
- }
-
-/* Routines to evaluate segment file format */
-#define IsCompressXLogFileName(fname) \
- (strlen(fname) == XLOG_FNAME_LEN + strlen(".gz") && \
- strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN && \
- strcmp((fname) + XLOG_FNAME_LEN, ".gz") == 0)
-#define IsPartialCompressXLogFileName(fname) \
- (strlen(fname) == XLOG_FNAME_LEN + strlen(".gz.partial") && \
- strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN && \
- strcmp((fname) + XLOG_FNAME_LEN, ".gz.partial") == 0)
-
-static void
-usage(void)
-{
- printf(_("%s receives PostgreSQL streaming transaction logs.\n\n"),
- progname);
- printf(_("Usage:\n"));
- printf(_(" %s [OPTION]...\n"), progname);
- printf(_("\nOptions:\n"));
- printf(_(" -D, --directory=DIR receive transaction log files into this directory\n"));
- printf(_(" --if-not-exists do not error if slot already exists when creating a slot\n"));
- printf(_(" -n, --no-loop do not loop on connection lost\n"));
- printf(_(" -s, --status-interval=SECS\n"
- " time between status packets sent to server (default: %d)\n"), (standby_message_timeout / 1000));
- printf(_(" -S, --slot=SLOTNAME replication slot to use\n"));
- printf(_(" --synchronous flush transaction log immediately after writing\n"));
- printf(_(" -v, --verbose output verbose messages\n"));
- printf(_(" -V, --version output version information, then exit\n"));
- printf(_(" -Z, --compress=0-9 compress logs with given compression level\n"));
- printf(_(" -?, --help show this help, then exit\n"));
- printf(_("\nConnection options:\n"));
- printf(_(" -d, --dbname=CONNSTR connection string\n"));
- printf(_(" -h, --host=HOSTNAME database server host or socket directory\n"));
- printf(_(" -p, --port=PORT database server port number\n"));
- printf(_(" -U, --username=NAME connect as specified database user\n"));
- printf(_(" -w, --no-password never prompt for password\n"));
- printf(_(" -W, --password force password prompt (should happen automatically)\n"));
- printf(_("\nOptional actions:\n"));
- printf(_(" --create-slot create a new replication slot (for the slot's name see --slot)\n"));
- printf(_(" --drop-slot drop the replication slot (for the slot's name see --slot)\n"));
- printf(_("\nReport bugs to .\n"));
-}
-
-static bool
-stop_streaming(XLogRecPtr xlogpos, uint32 timeline, bool segment_finished)
-{
- static uint32 prevtimeline = 0;
- static XLogRecPtr prevpos = InvalidXLogRecPtr;
-
- /* we assume that we get called once at the end of each segment */
- if (verbose && segment_finished)
- fprintf(stderr, _("%s: finished segment at %X/%X (timeline %u)\n"),
- progname, (uint32) (xlogpos >> 32), (uint32) xlogpos,
- timeline);
-
- /*
- * Note that we report the previous, not current, position here. After a
- * timeline switch, xlogpos points to the beginning of the segment because
- * that's where we always begin streaming. Reporting the end of previous
- * timeline isn't totally accurate, because the next timeline can begin
- * slightly before the end of the WAL that we received on the previous
- * timeline, but it's close enough for reporting purposes.
- */
- if (prevtimeline != 0 && prevtimeline != timeline)
- fprintf(stderr, _("%s: switched to timeline %u at %X/%X\n"),
- progname, timeline,
- (uint32) (prevpos >> 32), (uint32) prevpos);
-
- prevtimeline = timeline;
- prevpos = xlogpos;
-
- if (time_to_abort)
- {
- fprintf(stderr, _("%s: received interrupt signal, exiting\n"),
- progname);
- return true;
- }
- return false;
-}
-
-
-/*
- * Get destination directory.
- */
-static DIR *
-get_destination_dir(char *dest_folder)
-{
- DIR *dir;
-
- Assert(dest_folder != NULL);
- dir = opendir(dest_folder);
- if (dir == NULL)
- {
- fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
- progname, basedir, strerror(errno));
- disconnect_and_exit(1);
- }
-
- return dir;
-}
-
-
-/*
- * Close existing directory.
- */
-static void
-close_destination_dir(DIR *dest_dir, char *dest_folder)
-{
- Assert(dest_dir != NULL && dest_folder != NULL);
- if (closedir(dest_dir))
- {
- fprintf(stderr, _("%s: could not close directory \"%s\": %s\n"),
- progname, dest_folder, strerror(errno));
- disconnect_and_exit(1);
- }
-}
-
-
-/*
- * Determine starting location for streaming, based on any existing xlog
- * segments in the directory. We start at the end of the last one that is
- * complete (size matches XLogSegSize), on the timeline with highest ID.
- *
- * If there are no WAL files in the directory, returns InvalidXLogRecPtr.
- */
-static XLogRecPtr
-FindStreamingStart(uint32 *tli)
-{
- DIR *dir;
- struct dirent *dirent;
- XLogSegNo high_segno = 0;
- uint32 high_tli = 0;
- bool high_ispartial = false;
-
- dir = get_destination_dir(basedir);
-
- while (errno = 0, (dirent = readdir(dir)) != NULL)
- {
- uint32 tli;
- XLogSegNo segno;
- bool ispartial;
- bool iscompress;
-
- /*
- * Check if the filename looks like an xlog file, or a .partial file.
- */
- if (IsXLogFileName(dirent->d_name))
- {
- ispartial = false;
- iscompress = false;
- }
- else if (IsPartialXLogFileName(dirent->d_name))
- {
- ispartial = true;
- iscompress = false;
- }
- else if (IsCompressXLogFileName(dirent->d_name))
- {
- ispartial = false;
- iscompress = true;
- }
- else if (IsPartialCompressXLogFileName(dirent->d_name))
- {
- ispartial = true;
- iscompress = true;
- }
- else
- continue;
-
- /*
- * Looks like an xlog file. Parse its position.
- */
- XLogFromFileName(dirent->d_name, &tli, &segno);
-
- /*
- * Check that the segment has the right size, if it's supposed to be
- * completed. For non-compressed segments just check the on-disk size
- * and see if it matches a completed segment.
- * For compressed segments, look at the last 4 bytes of the compressed
- * file, which is where the uncompressed size is located for gz files
- * with a size lower than 4GB, and then compare it to the size of a
- * completed segment. The 4 last bytes correspond to the ISIZE member
- * according to http://www.zlib.org/rfc-gzip.html.
- */
- if (!ispartial && !iscompress)
- {
- struct stat statbuf;
- char fullpath[MAXPGPATH];
-
- snprintf(fullpath, sizeof(fullpath), "%s/%s", basedir, dirent->d_name);
- if (stat(fullpath, &statbuf) != 0)
- {
- fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"),
- progname, fullpath, strerror(errno));
- disconnect_and_exit(1);
- }
-
- if (statbuf.st_size != XLOG_SEG_SIZE)
- {
- fprintf(stderr,
- _("%s: segment file \"%s\" has incorrect size %d, skipping\n"),
- progname, dirent->d_name, (int) statbuf.st_size);
- continue;
- }
- }
- else if (!ispartial && iscompress)
- {
- int fd;
- char buf[4];
- int bytes_out;
- char fullpath[MAXPGPATH];
-
- snprintf(fullpath, sizeof(fullpath), "%s/%s", basedir, dirent->d_name);
-
- fd = open(fullpath, O_RDONLY | PG_BINARY);
- if (fd < 0)
- {
- fprintf(stderr, _("%s: could not open compressed file \"%s\": %s\n"),
- progname, fullpath, strerror(errno));
- disconnect_and_exit(1);
- }
- if (lseek(fd, (off_t)(-4), SEEK_END) < 0)
- {
- fprintf(stderr, _("%s: could not seek compressed file \"%s\": %s\n"),
- progname, fullpath, strerror(errno));
- disconnect_and_exit(1);
- }
- if (read(fd, (char *) buf, sizeof(buf)) != sizeof(buf))
- {
- fprintf(stderr, _("%s: could not read compressed file \"%s\": %s\n"),
- progname, fullpath, strerror(errno));
- disconnect_and_exit(1);
- }
-
- close(fd);
- bytes_out = (buf[3] << 24) | (buf[2] << 16) |
- (buf[1] << 8) | buf[0];
-
- if (bytes_out != XLOG_SEG_SIZE)
- {
- fprintf(stderr,
- _("%s: compressed segment file \"%s\" has incorrect uncompressed size %d, skipping\n"),
- progname, dirent->d_name, bytes_out);
- continue;
- }
- }
-
- /* Looks like a valid segment. Remember that we saw it. */
- if ((segno > high_segno) ||
- (segno == high_segno && tli > high_tli) ||
- (segno == high_segno && tli == high_tli && high_ispartial && !ispartial))
- {
- high_segno = segno;
- high_tli = tli;
- high_ispartial = ispartial;
- }
- }
-
- if (errno)
- {
- fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"),
- progname, basedir, strerror(errno));
- disconnect_and_exit(1);
- }
-
- close_destination_dir(dir, basedir);
-
- if (high_segno > 0)
- {
- XLogRecPtr high_ptr;
-
- /*
- * Move the starting pointer to the start of the next segment, if the
- * highest one we saw was completed. Otherwise start streaming from
- * the beginning of the .partial segment.
- */
- if (!high_ispartial)
- high_segno++;
-
- XLogSegNoOffsetToRecPtr(high_segno, 0, high_ptr);
-
- *tli = high_tli;
- return high_ptr;
- }
- else
- return InvalidXLogRecPtr;
-}
-
-/*
- * Start the log streaming
- */
-static void
-StreamLog(void)
-{
- XLogRecPtr serverpos;
- TimeLineID servertli;
- StreamCtl stream;
-
- MemSet(&stream, 0, sizeof(stream));
-
- /*
- * Connect in replication mode to the server
- */
- if (conn == NULL)
- conn = GetConnection();
- if (!conn)
- /* Error message already written in GetConnection() */
- return;
-
- if (!CheckServerVersionForStreaming(conn))
- {
- /*
- * Error message already written in CheckServerVersionForStreaming().
- * There's no hope of recovering from a version mismatch, so don't
- * retry.
- */
- disconnect_and_exit(1);
- }
-
- /*
- * Identify server, obtaining start LSN position and current timeline ID
- * at the same time, necessary if not valid data can be found in the
- * existing output directory.
- */
- if (!RunIdentifySystem(conn, NULL, &servertli, &serverpos, NULL))
- disconnect_and_exit(1);
-
- /*
- * Figure out where to start streaming.
- */
- stream.startpos = FindStreamingStart(&stream.timeline);
- if (stream.startpos == InvalidXLogRecPtr)
- {
- stream.startpos = serverpos;
- stream.timeline = servertli;
- }
-
- /*
- * Always start streaming at the beginning of a segment
- */
- stream.startpos -= stream.startpos % XLOG_SEG_SIZE;
-
- /*
- * Start the replication
- */
- if (verbose)
- fprintf(stderr,
- _("%s: starting log streaming at %X/%X (timeline %u)\n"),
- progname, (uint32) (stream.startpos >> 32), (uint32) stream.startpos,
- stream.timeline);
-
- stream.stream_stop = stop_streaming;
- stream.standby_message_timeout = standby_message_timeout;
- stream.synchronous = synchronous;
- stream.do_sync = true;
- stream.mark_done = false;
- stream.walmethod = CreateWalDirectoryMethod(basedir, compresslevel,
- stream.do_sync);
- stream.partial_suffix = ".partial";
- stream.replication_slot = replication_slot;
- stream.temp_slot = false;
-
- ReceiveXlogStream(conn, &stream);
-
- if (!stream.walmethod->finish())
- {
- fprintf(stderr,
- _("%s: could not finish writing WAL files: %s\n"),
- progname, strerror(errno));
- return;
- }
-
- PQfinish(conn);
-
- FreeWalDirectoryMethod();
- pg_free(stream.walmethod);
-
- conn = NULL;
-}
-
-/*
- * When sigint is called, just tell the system to exit at the next possible
- * moment.
- */
-#ifndef WIN32
-
-static void
-sigint_handler(int signum)
-{
- time_to_abort = true;
-}
-#endif
-
-int
-main(int argc, char **argv)
-{
- static struct option long_options[] = {
- {"help", no_argument, NULL, '?'},
- {"version", no_argument, NULL, 'V'},
- {"directory", required_argument, NULL, 'D'},
- {"dbname", required_argument, NULL, 'd'},
- {"host", required_argument, NULL, 'h'},
- {"port", required_argument, NULL, 'p'},
- {"username", required_argument, NULL, 'U'},
- {"no-loop", no_argument, NULL, 'n'},
- {"no-password", no_argument, NULL, 'w'},
- {"password", no_argument, NULL, 'W'},
- {"status-interval", required_argument, NULL, 's'},
- {"slot", required_argument, NULL, 'S'},
- {"verbose", no_argument, NULL, 'v'},
- {"compress", required_argument, NULL, 'Z'},
-/* action */
- {"create-slot", no_argument, NULL, 1},
- {"drop-slot", no_argument, NULL, 2},
- {"if-not-exists", no_argument, NULL, 3},
- {"synchronous", no_argument, NULL, 4},
- {NULL, 0, NULL, 0}
- };
-
- int c;
- int option_index;
- char *db_name;
-
- progname = get_progname(argv[0]);
- set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_basebackup"));
-
- if (argc > 1)
- {
- if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
- {
- usage();
- exit(0);
- }
- else if (strcmp(argv[1], "-V") == 0 ||
- strcmp(argv[1], "--version") == 0)
- {
- puts("pg_receivexlog (PostgreSQL) " PG_VERSION);
- exit(0);
- }
- }
-
- while ((c = getopt_long(argc, argv, "D:d:h:p:U:s:S:nwWvZ:",
- long_options, &option_index)) != -1)
- {
- switch (c)
- {
- case 'D':
- basedir = pg_strdup(optarg);
- break;
- case 'd':
- connection_string = pg_strdup(optarg);
- break;
- case 'h':
- dbhost = pg_strdup(optarg);
- break;
- case 'p':
- if (atoi(optarg) <= 0)
- {
- fprintf(stderr, _("%s: invalid port number \"%s\"\n"),
- progname, optarg);
- exit(1);
- }
- dbport = pg_strdup(optarg);
- break;
- case 'U':
- dbuser = pg_strdup(optarg);
- break;
- case 'w':
- dbgetpassword = -1;
- break;
- case 'W':
- dbgetpassword = 1;
- break;
- case 's':
- standby_message_timeout = atoi(optarg) * 1000;
- if (standby_message_timeout < 0)
- {
- fprintf(stderr, _("%s: invalid status interval \"%s\"\n"),
- progname, optarg);
- exit(1);
- }
- break;
- case 'S':
- replication_slot = pg_strdup(optarg);
- break;
- case 'n':
- noloop = 1;
- break;
- case 'v':
- verbose++;
- break;
- case 'Z':
- compresslevel = atoi(optarg);
- if (compresslevel < 0 || compresslevel > 9)
- {
- fprintf(stderr, _("%s: invalid compression level \"%s\"\n"),
- progname, optarg);
- exit(1);
- }
- break;
-/* action */
- case 1:
- do_create_slot = true;
- break;
- case 2:
- do_drop_slot = true;
- break;
- case 3:
- slot_exists_ok = true;
- break;
- case 4:
- synchronous = true;
- break;
- default:
-
- /*
- * getopt_long already emitted a complaint
- */
- fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
- progname);
- exit(1);
- }
- }
-
- /*
- * Any non-option arguments?
- */
- if (optind < argc)
- {
- fprintf(stderr,
- _("%s: too many command-line arguments (first is \"%s\")\n"),
- progname, argv[optind]);
- fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
- progname);
- exit(1);
- }
-
- if (do_drop_slot && do_create_slot)
- {
- fprintf(stderr, _("%s: cannot use --create-slot together with --drop-slot\n"), progname);
- fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
- progname);
- exit(1);
- }
-
- if (replication_slot == NULL && (do_drop_slot || do_create_slot))
- {
- /* translator: second %s is an option name */
- fprintf(stderr, _("%s: %s needs a slot to be specified using --slot\n"), progname,
- do_drop_slot ? "--drop-slot" : "--create-slot");
- fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
- progname);
- exit(1);
- }
-
- /*
- * Required arguments
- */
- if (basedir == NULL && !do_drop_slot && !do_create_slot)
- {
- fprintf(stderr, _("%s: no target directory specified\n"), progname);
- fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
- progname);
- exit(1);
- }
-
-#ifndef HAVE_LIBZ
- if (compresslevel != 0)
- {
- fprintf(stderr,
- _("%s: this build does not support compression\n"),
- progname);
- exit(1);
- }
-#endif
-
- /*
- * Check existence of destination folder.
- */
- if (!do_drop_slot && !do_create_slot)
- {
- DIR *dir = get_destination_dir(basedir);
-
- close_destination_dir(dir, basedir);
- }
-
-#ifndef WIN32
- pqsignal(SIGINT, sigint_handler);
-#endif
-
- /*
- * Obtain a connection before doing anything.
- */
- conn = GetConnection();
- if (!conn)
- /* error message already written in GetConnection() */
- exit(1);
-
- /*
- * Run IDENTIFY_SYSTEM to make sure we've successfully have established a
- * replication connection and haven't connected using a database specific
- * connection.
- */
- if (!RunIdentifySystem(conn, NULL, NULL, NULL, &db_name))
- disconnect_and_exit(1);
-
- /*
- * Check that there is a database associated with connection, none should
- * be defined in this context.
- */
- if (db_name)
- {
- fprintf(stderr,
- _("%s: replication connection using slot \"%s\" is unexpectedly database specific\n"),
- progname, replication_slot);
- disconnect_and_exit(1);
- }
-
- /*
- * Drop a replication slot.
- */
- if (do_drop_slot)
- {
- if (verbose)
- fprintf(stderr,
- _("%s: dropping replication slot \"%s\"\n"),
- progname, replication_slot);
-
- if (!DropReplicationSlot(conn, replication_slot))
- disconnect_and_exit(1);
- disconnect_and_exit(0);
- }
-
- /* Create a replication slot */
- if (do_create_slot)
- {
- if (verbose)
- fprintf(stderr,
- _("%s: creating replication slot \"%s\"\n"),
- progname, replication_slot);
-
- if (!CreateReplicationSlot(conn, replication_slot, NULL, true,
- slot_exists_ok))
- disconnect_and_exit(1);
- disconnect_and_exit(0);
- }
-
- /*
- * Don't close the connection here so that subsequent StreamLog() can
- * reuse it.
- */
-
- while (true)
- {
- StreamLog();
- if (time_to_abort)
- {
- /*
- * We've been Ctrl-C'ed. That's not an error, so exit without an
- * errorcode.
- */
- exit(0);
- }
- else if (noloop)
- {
- fprintf(stderr, _("%s: disconnected\n"), progname);
- exit(1);
- }
- else
- {
- fprintf(stderr,
- /* translator: check source for value for %d */
- _("%s: disconnected; waiting %d seconds to try again\n"),
- progname, RECONNECT_SLEEP_TIME);
- pg_usleep(RECONNECT_SLEEP_TIME * 1000000);
- }
- }
-}
diff --git a/src/bin/pg_basebackup/t/020_pg_receivewal.pl b/src/bin/pg_basebackup/t/020_pg_receivewal.pl
new file mode 100644
index 0000000000..b4cb6f729d
--- /dev/null
+++ b/src/bin/pg_basebackup/t/020_pg_receivewal.pl
@@ -0,0 +1,8 @@
+use strict;
+use warnings;
+use TestLib;
+use Test::More tests => 8;
+
+program_help_ok('pg_receivewal');
+program_version_ok('pg_receivewal');
+program_options_handling_ok('pg_receivewal');
diff --git a/src/bin/pg_basebackup/t/020_pg_receivexlog.pl b/src/bin/pg_basebackup/t/020_pg_receivexlog.pl
deleted file mode 100644
index c68e86d912..0000000000
--- a/src/bin/pg_basebackup/t/020_pg_receivexlog.pl
+++ /dev/null
@@ -1,8 +0,0 @@
-use strict;
-use warnings;
-use TestLib;
-use Test::More tests => 8;
-
-program_help_ok('pg_receivexlog');
-program_version_ok('pg_receivexlog');
-program_options_handling_ok('pg_receivexlog');
diff --git a/src/bin/pg_basebackup/walmethods.h b/src/bin/pg_basebackup/walmethods.h
index 2cd8b6d755..8d679dab61 100644
--- a/src/bin/pg_basebackup/walmethods.h
+++ b/src/bin/pg_basebackup/walmethods.h
@@ -39,7 +39,7 @@ struct WalWriteMethod
* - WalDirectoryMethod - write WAL to regular files in a standard pg_xlog
* - TarDirectoryMethod - write WAL to a tarfile corresponding to pg_xlog
* (only implements the methods required for pg_basebackup,
- * not all those required for pg_receivexlog)
+ * not all those required for pg_receivewal)
*/
WalWriteMethod *CreateWalDirectoryMethod(const char *basedir,
int compression, bool sync);
diff --git a/src/bin/pg_resetwal/.gitignore b/src/bin/pg_resetwal/.gitignore
new file mode 100644
index 0000000000..236abb4323
--- /dev/null
+++ b/src/bin/pg_resetwal/.gitignore
@@ -0,0 +1 @@
+/pg_resetwal
diff --git a/src/bin/pg_resetwal/Makefile b/src/bin/pg_resetwal/Makefile
new file mode 100644
index 0000000000..0f6e5da255
--- /dev/null
+++ b/src/bin/pg_resetwal/Makefile
@@ -0,0 +1,35 @@
+#-------------------------------------------------------------------------
+#
+# Makefile for src/bin/pg_resetwal
+#
+# Copyright (c) 1998-2017, PostgreSQL Global Development Group
+#
+# src/bin/pg_resetwal/Makefile
+#
+#-------------------------------------------------------------------------
+
+PGFILEDESC = "pg_resetwal - reset PostgreSQL WAL log"
+PGAPPICON=win32
+
+subdir = src/bin/pg_resetwal
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS= pg_resetwal.o $(WIN32RES)
+
+all: pg_resetwal
+
+pg_resetwal: $(OBJS) | submake-libpgport
+ $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
+
+install: all installdirs
+ $(INSTALL_PROGRAM) pg_resetwal$(X) '$(DESTDIR)$(bindir)/pg_resetwal$(X)'
+
+installdirs:
+ $(MKDIR_P) '$(DESTDIR)$(bindir)'
+
+uninstall:
+ rm -f '$(DESTDIR)$(bindir)/pg_resetwal$(X)'
+
+clean distclean maintainer-clean:
+ rm -f pg_resetwal$(X) $(OBJS)
diff --git a/src/bin/pg_resetwal/nls.mk b/src/bin/pg_resetwal/nls.mk
new file mode 100644
index 0000000000..d04d58ee34
--- /dev/null
+++ b/src/bin/pg_resetwal/nls.mk
@@ -0,0 +1,4 @@
+# src/bin/pg_resetwal/nls.mk
+CATALOG_NAME = pg_resetwal
+AVAIL_LANGUAGES = cs de es fr it ja ko pl pt_BR ru sv zh_CN
+GETTEXT_FILES = pg_resetwal.c ../../common/restricted_token.c
diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c
new file mode 100644
index 0000000000..96b7097f8b
--- /dev/null
+++ b/src/bin/pg_resetwal/pg_resetwal.c
@@ -0,0 +1,1184 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_resetwal.c
+ * A utility to "zero out" the xlog when it's corrupt beyond recovery.
+ * Can also rebuild pg_control if needed.
+ *
+ * The theory of operation is fairly simple:
+ * 1. Read the existing pg_control (which will include the last
+ * checkpoint record). If it is an old format then update to
+ * current format.
+ * 2. If pg_control is corrupt, attempt to intuit reasonable values,
+ * by scanning the old xlog if necessary.
+ * 3. Modify pg_control to reflect a "shutdown" state with a checkpoint
+ * record at the start of xlog.
+ * 4. Flush the existing xlog files and write a new segment with
+ * just a checkpoint record in it. The new segment is positioned
+ * just past the end of the old xlog, so that existing LSNs in
+ * data pages will appear to be "in the past".
+ * This is all pretty straightforward except for the intuition part of
+ * step 2 ...
+ *
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/bin/pg_resetwal/pg_resetwal.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * We have to use postgres.h not postgres_fe.h here, because there's so much
+ * backend-only stuff in the XLOG include files we need. But we need a
+ * frontend-ish environment otherwise. Hence this ugly hack.
+ */
+#define FRONTEND 1
+
+#include "postgres.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "access/transam.h"
+#include "access/tuptoaster.h"
+#include "access/multixact.h"
+#include "access/xlog.h"
+#include "access/xlog_internal.h"
+#include "catalog/catversion.h"
+#include "catalog/pg_control.h"
+#include "common/fe_memutils.h"
+#include "common/restricted_token.h"
+#include "storage/large_object.h"
+#include "pg_getopt.h"
+
+
+static ControlFileData ControlFile; /* pg_control values */
+static XLogSegNo newXlogSegNo; /* new XLOG segment # */
+static bool guessed = false; /* T if we had to guess at any values */
+static const char *progname;
+static uint32 set_xid_epoch = (uint32) -1;
+static TransactionId set_xid = 0;
+static TransactionId set_oldest_commit_ts_xid = 0;
+static TransactionId set_newest_commit_ts_xid = 0;
+static Oid set_oid = 0;
+static MultiXactId set_mxid = 0;
+static MultiXactOffset set_mxoff = (MultiXactOffset) -1;
+static uint32 minXlogTli = 0;
+static XLogSegNo minXlogSegNo = 0;
+
+static bool ReadControlFile(void);
+static void GuessControlValues(void);
+static void PrintControlValues(bool guessed);
+static void PrintNewControlValues(void);
+static void RewriteControlFile(void);
+static void FindEndOfXLOG(void);
+static void KillExistingXLOG(void);
+static void KillExistingArchiveStatus(void);
+static void WriteEmptyXLOG(void);
+static void usage(void);
+
+
+int
+main(int argc, char *argv[])
+{
+ int c;
+ bool force = false;
+ bool noupdate = false;
+ MultiXactId set_oldestmxid = 0;
+ char *endptr;
+ char *endptr2;
+ char *DataDir = NULL;
+ int fd;
+
+ set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_resetwal"));
+
+ progname = get_progname(argv[0]);
+
+ if (argc > 1)
+ {
+ if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
+ {
+ usage();
+ exit(0);
+ }
+ if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
+ {
+ puts("pg_resetwal (PostgreSQL) " PG_VERSION);
+ exit(0);
+ }
+ }
+
+
+ while ((c = getopt(argc, argv, "c:D:e:fl:m:no:O:x:")) != -1)
+ {
+ switch (c)
+ {
+ case 'D':
+ DataDir = optarg;
+ break;
+
+ case 'f':
+ force = true;
+ break;
+
+ case 'n':
+ noupdate = true;
+ break;
+
+ case 'e':
+ set_xid_epoch = strtoul(optarg, &endptr, 0);
+ if (endptr == optarg || *endptr != '\0')
+ {
+ /*------
+ translator: the second %s is a command line argument (-e, etc) */
+ fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-e");
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
+ exit(1);
+ }
+ if (set_xid_epoch == -1)
+ {
+ fprintf(stderr, _("%s: transaction ID epoch (-e) must not be -1\n"), progname);
+ exit(1);
+ }
+ break;
+
+ case 'x':
+ set_xid = strtoul(optarg, &endptr, 0);
+ if (endptr == optarg || *endptr != '\0')
+ {
+ fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-x");
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
+ exit(1);
+ }
+ if (set_xid == 0)
+ {
+ fprintf(stderr, _("%s: transaction ID (-x) must not be 0\n"), progname);
+ exit(1);
+ }
+ break;
+
+ case 'c':
+ set_oldest_commit_ts_xid = strtoul(optarg, &endptr, 0);
+ if (endptr == optarg || *endptr != ',')
+ {
+ fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-c");
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
+ exit(1);
+ }
+ set_newest_commit_ts_xid = strtoul(endptr + 1, &endptr2, 0);
+ if (endptr2 == endptr + 1 || *endptr2 != '\0')
+ {
+ fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-c");
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
+ exit(1);
+ }
+
+ if (set_oldest_commit_ts_xid < 2 &&
+ set_oldest_commit_ts_xid != 0)
+ {
+ fprintf(stderr, _("%s: transaction ID (-c) must be either 0 or greater than or equal to 2\n"), progname);
+ exit(1);
+ }
+
+ if (set_newest_commit_ts_xid < 2 &&
+ set_newest_commit_ts_xid != 0)
+ {
+ fprintf(stderr, _("%s: transaction ID (-c) must be either 0 or greater than or equal to 2\n"), progname);
+ exit(1);
+ }
+ break;
+
+ case 'o':
+ set_oid = strtoul(optarg, &endptr, 0);
+ if (endptr == optarg || *endptr != '\0')
+ {
+ fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-o");
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
+ exit(1);
+ }
+ if (set_oid == 0)
+ {
+ fprintf(stderr, _("%s: OID (-o) must not be 0\n"), progname);
+ exit(1);
+ }
+ break;
+
+ case 'm':
+ set_mxid = strtoul(optarg, &endptr, 0);
+ if (endptr == optarg || *endptr != ',')
+ {
+ fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-m");
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
+ exit(1);
+ }
+
+ set_oldestmxid = strtoul(endptr + 1, &endptr2, 0);
+ if (endptr2 == endptr + 1 || *endptr2 != '\0')
+ {
+ fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-m");
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
+ exit(1);
+ }
+ if (set_mxid == 0)
+ {
+ fprintf(stderr, _("%s: multitransaction ID (-m) must not be 0\n"), progname);
+ exit(1);
+ }
+
+ /*
+ * XXX It'd be nice to have more sanity checks here, e.g. so
+ * that oldest is not wrapped around w.r.t. nextMulti.
+ */
+ if (set_oldestmxid == 0)
+ {
+ fprintf(stderr, _("%s: oldest multitransaction ID (-m) must not be 0\n"),
+ progname);
+ exit(1);
+ }
+ break;
+
+ case 'O':
+ set_mxoff = strtoul(optarg, &endptr, 0);
+ if (endptr == optarg || *endptr != '\0')
+ {
+ fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-O");
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
+ exit(1);
+ }
+ if (set_mxoff == -1)
+ {
+ fprintf(stderr, _("%s: multitransaction offset (-O) must not be -1\n"), progname);
+ exit(1);
+ }
+ break;
+
+ case 'l':
+ if (strspn(optarg, "01234567890ABCDEFabcdef") != XLOG_FNAME_LEN)
+ {
+ fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-l");
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
+ exit(1);
+ }
+ XLogFromFileName(optarg, &minXlogTli, &minXlogSegNo);
+ break;
+
+ default:
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
+ exit(1);
+ }
+ }
+
+ if (DataDir == NULL && optind < argc)
+ DataDir = argv[optind++];
+
+ /* Complain if any arguments remain */
+ if (optind < argc)
+ {
+ fprintf(stderr, _("%s: too many command-line arguments (first is \"%s\")\n"),
+ progname, argv[optind]);
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
+ progname);
+ exit(1);
+ }
+
+ if (DataDir == NULL)
+ {
+ fprintf(stderr, _("%s: no data directory specified\n"), progname);
+ fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
+ exit(1);
+ }
+
+ /*
+ * Don't allow pg_resetwal to be run as root, to avoid overwriting the
+ * ownership of files in the data directory. We need only check for root
+ * -- any other user won't have sufficient permissions to modify files in
+ * the data directory.
+ */
+#ifndef WIN32
+ if (geteuid() == 0)
+ {
+ fprintf(stderr, _("%s: cannot be executed by \"root\"\n"),
+ progname);
+ fprintf(stderr, _("You must run %s as the PostgreSQL superuser.\n"),
+ progname);
+ exit(1);
+ }
+#endif
+
+ get_restricted_token(progname);
+
+ if (chdir(DataDir) < 0)
+ {
+ fprintf(stderr, _("%s: could not change directory to \"%s\": %s\n"),
+ progname, DataDir, strerror(errno));
+ exit(1);
+ }
+
+ /*
+ * Check for a postmaster lock file --- if there is one, refuse to
+ * proceed, on grounds we might be interfering with a live installation.
+ */
+ if ((fd = open("postmaster.pid", O_RDONLY, 0)) < 0)
+ {
+ if (errno != ENOENT)
+ {
+ fprintf(stderr, _("%s: could not open file \"%s\" for reading: %s\n"),
+ progname, "postmaster.pid", strerror(errno));
+ exit(1);
+ }
+ }
+ else
+ {
+ fprintf(stderr, _("%s: lock file \"%s\" exists\n"
+ "Is a server running? If not, delete the lock file and try again.\n"),
+ progname, "postmaster.pid");
+ exit(1);
+ }
+
+ /*
+ * Attempt to read the existing pg_control file
+ */
+ if (!ReadControlFile())
+ GuessControlValues();
+
+ /*
+ * Also look at existing segment files to set up newXlogSegNo
+ */
+ FindEndOfXLOG();
+
+ /*
+ * If we're not going to proceed with the reset, print the current control
+ * file parameters.
+ */
+ if ((guessed && !force) || noupdate)
+ PrintControlValues(guessed);
+
+ /*
+ * Adjust fields if required by switches. (Do this now so that printout,
+ * if any, includes these values.)
+ */
+ if (set_xid_epoch != -1)
+ ControlFile.checkPointCopy.nextXidEpoch = set_xid_epoch;
+
+ if (set_xid != 0)
+ {
+ ControlFile.checkPointCopy.nextXid = set_xid;
+
+ /*
+ * For the moment, just set oldestXid to a value that will force
+ * immediate autovacuum-for-wraparound. It's not clear whether adding
+ * user control of this is useful, so let's just do something that's
+ * reasonably safe. The magic constant here corresponds to the
+ * maximum allowed value of autovacuum_freeze_max_age.
+ */
+ ControlFile.checkPointCopy.oldestXid = set_xid - 2000000000;
+ if (ControlFile.checkPointCopy.oldestXid < FirstNormalTransactionId)
+ ControlFile.checkPointCopy.oldestXid += FirstNormalTransactionId;
+ ControlFile.checkPointCopy.oldestXidDB = InvalidOid;
+ }
+
+ if (set_oldest_commit_ts_xid != 0)
+ ControlFile.checkPointCopy.oldestCommitTsXid = set_oldest_commit_ts_xid;
+ if (set_newest_commit_ts_xid != 0)
+ ControlFile.checkPointCopy.newestCommitTsXid = set_newest_commit_ts_xid;
+
+ if (set_oid != 0)
+ ControlFile.checkPointCopy.nextOid = set_oid;
+
+ if (set_mxid != 0)
+ {
+ ControlFile.checkPointCopy.nextMulti = set_mxid;
+
+ ControlFile.checkPointCopy.oldestMulti = set_oldestmxid;
+ if (ControlFile.checkPointCopy.oldestMulti < FirstMultiXactId)
+ ControlFile.checkPointCopy.oldestMulti += FirstMultiXactId;
+ ControlFile.checkPointCopy.oldestMultiDB = InvalidOid;
+ }
+
+ if (set_mxoff != -1)
+ ControlFile.checkPointCopy.nextMultiOffset = set_mxoff;
+
+ if (minXlogTli > ControlFile.checkPointCopy.ThisTimeLineID)
+ {
+ ControlFile.checkPointCopy.ThisTimeLineID = minXlogTli;
+ ControlFile.checkPointCopy.PrevTimeLineID = minXlogTli;
+ }
+
+ if (minXlogSegNo > newXlogSegNo)
+ newXlogSegNo = minXlogSegNo;
+
+ /*
+ * If we had to guess anything, and -f was not given, just print the
+ * guessed values and exit. Also print if -n is given.
+ */
+ if ((guessed && !force) || noupdate)
+ {
+ PrintNewControlValues();
+ if (!noupdate)
+ {
+ printf(_("\nIf these values seem acceptable, use -f to force reset.\n"));
+ exit(1);
+ }
+ else
+ exit(0);
+ }
+
+ /*
+ * Don't reset from a dirty pg_control without -f, either.
+ */
+ if (ControlFile.state != DB_SHUTDOWNED && !force)
+ {
+ printf(_("The database server was not shut down cleanly.\n"
+ "Resetting the transaction log might cause data to be lost.\n"
+ "If you want to proceed anyway, use -f to force reset.\n"));
+ exit(1);
+ }
+
+ /*
+ * Else, do the dirty deed.
+ */
+ RewriteControlFile();
+ KillExistingXLOG();
+ KillExistingArchiveStatus();
+ WriteEmptyXLOG();
+
+ printf(_("Transaction log reset\n"));
+ return 0;
+}
+
+
+/*
+ * Try to read the existing pg_control file.
+ *
+ * This routine is also responsible for updating old pg_control versions
+ * to the current format. (Currently we don't do anything of the sort.)
+ */
+static bool
+ReadControlFile(void)
+{
+ int fd;
+ int len;
+ char *buffer;
+ pg_crc32c crc;
+
+ if ((fd = open(XLOG_CONTROL_FILE, O_RDONLY | PG_BINARY, 0)) < 0)
+ {
+ /*
+ * If pg_control is not there at all, or we can't read it, the odds
+ * are we've been handed a bad DataDir path, so give up. User can do
+ * "touch pg_control" to force us to proceed.
+ */
+ fprintf(stderr, _("%s: could not open file \"%s\" for reading: %s\n"),
+ progname, XLOG_CONTROL_FILE, strerror(errno));
+ if (errno == ENOENT)
+ fprintf(stderr, _("If you are sure the data directory path is correct, execute\n"
+ " touch %s\n"
+ "and try again.\n"),
+ XLOG_CONTROL_FILE);
+ exit(1);
+ }
+
+ /* Use malloc to ensure we have a maxaligned buffer */
+ buffer = (char *) pg_malloc(PG_CONTROL_SIZE);
+
+ len = read(fd, buffer, PG_CONTROL_SIZE);
+ if (len < 0)
+ {
+ fprintf(stderr, _("%s: could not read file \"%s\": %s\n"),
+ progname, XLOG_CONTROL_FILE, strerror(errno));
+ exit(1);
+ }
+ close(fd);
+
+ if (len >= sizeof(ControlFileData) &&
+ ((ControlFileData *) buffer)->pg_control_version == PG_CONTROL_VERSION)
+ {
+ /* Check the CRC. */
+ INIT_CRC32C(crc);
+ COMP_CRC32C(crc,
+ buffer,
+ offsetof(ControlFileData, crc));
+ FIN_CRC32C(crc);
+
+ if (EQ_CRC32C(crc, ((ControlFileData *) buffer)->crc))
+ {
+ /* Valid data... */
+ memcpy(&ControlFile, buffer, sizeof(ControlFile));
+ return true;
+ }
+
+ fprintf(stderr, _("%s: pg_control exists but has invalid CRC; proceed with caution\n"),
+ progname);
+ /* We will use the data anyway, but treat it as guessed. */
+ memcpy(&ControlFile, buffer, sizeof(ControlFile));
+ guessed = true;
+ return true;
+ }
+
+ /* Looks like it's a mess. */
+ fprintf(stderr, _("%s: pg_control exists but is broken or unknown version; ignoring it\n"),
+ progname);
+ return false;
+}
+
+
+/*
+ * Guess at pg_control values when we can't read the old ones.
+ */
+static void
+GuessControlValues(void)
+{
+ uint64 sysidentifier;
+ struct timeval tv;
+
+ /*
+ * Set up a completely default set of pg_control values.
+ */
+ guessed = true;
+ memset(&ControlFile, 0, sizeof(ControlFile));
+
+ ControlFile.pg_control_version = PG_CONTROL_VERSION;
+ ControlFile.catalog_version_no = CATALOG_VERSION_NO;
+
+ /*
+ * Create a new unique installation identifier, since we can no longer use
+ * any old XLOG records. See notes in xlog.c about the algorithm.
+ */
+ gettimeofday(&tv, NULL);
+ sysidentifier = ((uint64) tv.tv_sec) << 32;
+ sysidentifier |= ((uint64) tv.tv_usec) << 12;
+ sysidentifier |= getpid() & 0xFFF;
+
+ ControlFile.system_identifier = sysidentifier;
+
+ ControlFile.checkPointCopy.redo = SizeOfXLogLongPHD;
+ ControlFile.checkPointCopy.ThisTimeLineID = 1;
+ ControlFile.checkPointCopy.PrevTimeLineID = 1;
+ ControlFile.checkPointCopy.fullPageWrites = false;
+ ControlFile.checkPointCopy.nextXidEpoch = 0;
+ ControlFile.checkPointCopy.nextXid = FirstNormalTransactionId;
+ ControlFile.checkPointCopy.nextOid = FirstBootstrapObjectId;
+ ControlFile.checkPointCopy.nextMulti = FirstMultiXactId;
+ ControlFile.checkPointCopy.nextMultiOffset = 0;
+ ControlFile.checkPointCopy.oldestXid = FirstNormalTransactionId;
+ ControlFile.checkPointCopy.oldestXidDB = InvalidOid;
+ ControlFile.checkPointCopy.oldestMulti = FirstMultiXactId;
+ ControlFile.checkPointCopy.oldestMultiDB = InvalidOid;
+ ControlFile.checkPointCopy.time = (pg_time_t) time(NULL);
+ ControlFile.checkPointCopy.oldestActiveXid = InvalidTransactionId;
+
+ ControlFile.state = DB_SHUTDOWNED;
+ ControlFile.time = (pg_time_t) time(NULL);
+ ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
+ ControlFile.unloggedLSN = 1;
+
+ /* minRecoveryPoint, backupStartPoint and backupEndPoint can be left zero */
+
+ ControlFile.wal_level = WAL_LEVEL_MINIMAL;
+ ControlFile.wal_log_hints = false;
+ ControlFile.track_commit_timestamp = false;
+ ControlFile.MaxConnections = 100;
+ ControlFile.max_worker_processes = 8;
+ ControlFile.max_prepared_xacts = 0;
+ ControlFile.max_locks_per_xact = 64;
+
+ ControlFile.maxAlign = MAXIMUM_ALIGNOF;
+ ControlFile.floatFormat = FLOATFORMAT_VALUE;
+ ControlFile.blcksz = BLCKSZ;
+ ControlFile.relseg_size = RELSEG_SIZE;
+ ControlFile.xlog_blcksz = XLOG_BLCKSZ;
+ ControlFile.xlog_seg_size = XLOG_SEG_SIZE;
+ ControlFile.nameDataLen = NAMEDATALEN;
+ ControlFile.indexMaxKeys = INDEX_MAX_KEYS;
+ ControlFile.toast_max_chunk_size = TOAST_MAX_CHUNK_SIZE;
+ ControlFile.loblksize = LOBLKSIZE;
+#ifdef HAVE_INT64_TIMESTAMP
+ ControlFile.enableIntTimes = true;
+#else
+ ControlFile.enableIntTimes = false;
+#endif
+ ControlFile.float4ByVal = FLOAT4PASSBYVAL;
+ ControlFile.float8ByVal = FLOAT8PASSBYVAL;
+
+ /*
+ * XXX eventually, should try to grovel through old XLOG to develop more
+ * accurate values for TimeLineID, nextXID, etc.
+ */
+}
+
+
+/*
+ * Print the guessed pg_control values when we had to guess.
+ *
+ * NB: this display should be just those fields that will not be
+ * reset by RewriteControlFile().
+ */
+static void
+PrintControlValues(bool guessed)
+{
+ char sysident_str[32];
+
+ if (guessed)
+ printf(_("Guessed pg_control values:\n\n"));
+ else
+ printf(_("Current pg_control values:\n\n"));
+
+ /*
+ * Format system_identifier separately to keep platform-dependent format
+ * code out of the translatable message string.
+ */
+ snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT,
+ ControlFile.system_identifier);
+
+ printf(_("pg_control version number: %u\n"),
+ ControlFile.pg_control_version);
+ printf(_("Catalog version number: %u\n"),
+ ControlFile.catalog_version_no);
+ printf(_("Database system identifier: %s\n"),
+ sysident_str);
+ printf(_("Latest checkpoint's TimeLineID: %u\n"),
+ ControlFile.checkPointCopy.ThisTimeLineID);
+ printf(_("Latest checkpoint's full_page_writes: %s\n"),
+ ControlFile.checkPointCopy.fullPageWrites ? _("on") : _("off"));
+ printf(_("Latest checkpoint's NextXID: %u:%u\n"),
+ ControlFile.checkPointCopy.nextXidEpoch,
+ ControlFile.checkPointCopy.nextXid);
+ printf(_("Latest checkpoint's NextOID: %u\n"),
+ ControlFile.checkPointCopy.nextOid);
+ printf(_("Latest checkpoint's NextMultiXactId: %u\n"),
+ ControlFile.checkPointCopy.nextMulti);
+ printf(_("Latest checkpoint's NextMultiOffset: %u\n"),
+ ControlFile.checkPointCopy.nextMultiOffset);
+ printf(_("Latest checkpoint's oldestXID: %u\n"),
+ ControlFile.checkPointCopy.oldestXid);
+ printf(_("Latest checkpoint's oldestXID's DB: %u\n"),
+ ControlFile.checkPointCopy.oldestXidDB);
+ printf(_("Latest checkpoint's oldestActiveXID: %u\n"),
+ ControlFile.checkPointCopy.oldestActiveXid);
+ printf(_("Latest checkpoint's oldestMultiXid: %u\n"),
+ ControlFile.checkPointCopy.oldestMulti);
+ printf(_("Latest checkpoint's oldestMulti's DB: %u\n"),
+ ControlFile.checkPointCopy.oldestMultiDB);
+ printf(_("Latest checkpoint's oldestCommitTsXid:%u\n"),
+ ControlFile.checkPointCopy.oldestCommitTsXid);
+ printf(_("Latest checkpoint's newestCommitTsXid:%u\n"),
+ ControlFile.checkPointCopy.newestCommitTsXid);
+ printf(_("Maximum data alignment: %u\n"),
+ ControlFile.maxAlign);
+ /* we don't print floatFormat since can't say much useful about it */
+ printf(_("Database block size: %u\n"),
+ ControlFile.blcksz);
+ printf(_("Blocks per segment of large relation: %u\n"),
+ ControlFile.relseg_size);
+ printf(_("WAL block size: %u\n"),
+ ControlFile.xlog_blcksz);
+ printf(_("Bytes per WAL segment: %u\n"),
+ ControlFile.xlog_seg_size);
+ printf(_("Maximum length of identifiers: %u\n"),
+ ControlFile.nameDataLen);
+ printf(_("Maximum columns in an index: %u\n"),
+ ControlFile.indexMaxKeys);
+ printf(_("Maximum size of a TOAST chunk: %u\n"),
+ ControlFile.toast_max_chunk_size);
+ printf(_("Size of a large-object chunk: %u\n"),
+ ControlFile.loblksize);
+ printf(_("Date/time type storage: %s\n"),
+ (ControlFile.enableIntTimes ? _("64-bit integers") : _("floating-point numbers")));
+ printf(_("Float4 argument passing: %s\n"),
+ (ControlFile.float4ByVal ? _("by value") : _("by reference")));
+ printf(_("Float8 argument passing: %s\n"),
+ (ControlFile.float8ByVal ? _("by value") : _("by reference")));
+ printf(_("Data page checksum version: %u\n"),
+ ControlFile.data_checksum_version);
+}
+
+
+/*
+ * Print the values to be changed.
+ */
+static void
+PrintNewControlValues(void)
+{
+ char fname[MAXFNAMELEN];
+
+ /* This will be always printed in order to keep format same. */
+ printf(_("\n\nValues to be changed:\n\n"));
+
+ XLogFileName(fname, ControlFile.checkPointCopy.ThisTimeLineID, newXlogSegNo);
+ printf(_("First log segment after reset: %s\n"), fname);
+
+ if (set_mxid != 0)
+ {
+ printf(_("NextMultiXactId: %u\n"),
+ ControlFile.checkPointCopy.nextMulti);
+ printf(_("OldestMultiXid: %u\n"),
+ ControlFile.checkPointCopy.oldestMulti);
+ printf(_("OldestMulti's DB: %u\n"),
+ ControlFile.checkPointCopy.oldestMultiDB);
+ }
+
+ if (set_mxoff != -1)
+ {
+ printf(_("NextMultiOffset: %u\n"),
+ ControlFile.checkPointCopy.nextMultiOffset);
+ }
+
+ if (set_oid != 0)
+ {
+ printf(_("NextOID: %u\n"),
+ ControlFile.checkPointCopy.nextOid);
+ }
+
+ if (set_xid != 0)
+ {
+ printf(_("NextXID: %u\n"),
+ ControlFile.checkPointCopy.nextXid);
+ printf(_("OldestXID: %u\n"),
+ ControlFile.checkPointCopy.oldestXid);
+ printf(_("OldestXID's DB: %u\n"),
+ ControlFile.checkPointCopy.oldestXidDB);
+ }
+
+ if (set_xid_epoch != -1)
+ {
+ printf(_("NextXID epoch: %u\n"),
+ ControlFile.checkPointCopy.nextXidEpoch);
+ }
+
+ if (set_oldest_commit_ts_xid != 0)
+ {
+ printf(_("oldestCommitTsXid: %u\n"),
+ ControlFile.checkPointCopy.oldestCommitTsXid);
+ }
+ if (set_newest_commit_ts_xid != 0)
+ {
+ printf(_("newestCommitTsXid: %u\n"),
+ ControlFile.checkPointCopy.newestCommitTsXid);
+ }
+}
+
+
+/*
+ * Write out the new pg_control file.
+ */
+static void
+RewriteControlFile(void)
+{
+ int fd;
+ char buffer[PG_CONTROL_SIZE]; /* need not be aligned */
+
+ /*
+ * Adjust fields as needed to force an empty XLOG starting at
+ * newXlogSegNo.
+ */
+ XLogSegNoOffsetToRecPtr(newXlogSegNo, SizeOfXLogLongPHD,
+ ControlFile.checkPointCopy.redo);
+ ControlFile.checkPointCopy.time = (pg_time_t) time(NULL);
+
+ ControlFile.state = DB_SHUTDOWNED;
+ ControlFile.time = (pg_time_t) time(NULL);
+ ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
+ ControlFile.prevCheckPoint = 0;
+ ControlFile.minRecoveryPoint = 0;
+ ControlFile.minRecoveryPointTLI = 0;
+ ControlFile.backupStartPoint = 0;
+ ControlFile.backupEndPoint = 0;
+ ControlFile.backupEndRequired = false;
+
+ /*
+ * Force the defaults for max_* settings. The values don't really matter
+ * as long as wal_level='minimal'; the postmaster will reset these fields
+ * anyway at startup.
+ */
+ ControlFile.wal_level = WAL_LEVEL_MINIMAL;
+ ControlFile.wal_log_hints = false;
+ ControlFile.track_commit_timestamp = false;
+ ControlFile.MaxConnections = 100;
+ ControlFile.max_worker_processes = 8;
+ ControlFile.max_prepared_xacts = 0;
+ ControlFile.max_locks_per_xact = 64;
+
+ /* Now we can force the recorded xlog seg size to the right thing. */
+ ControlFile.xlog_seg_size = XLogSegSize;
+
+ /* Contents are protected with a CRC */
+ INIT_CRC32C(ControlFile.crc);
+ COMP_CRC32C(ControlFile.crc,
+ (char *) &ControlFile,
+ offsetof(ControlFileData, crc));
+ FIN_CRC32C(ControlFile.crc);
+
+ /*
+ * We write out PG_CONTROL_SIZE bytes into pg_control, zero-padding the
+ * excess over sizeof(ControlFileData). This reduces the odds of
+ * premature-EOF errors when reading pg_control. We'll still fail when we
+ * check the contents of the file, but hopefully with a more specific
+ * error than "couldn't read pg_control".
+ */
+ if (sizeof(ControlFileData) > PG_CONTROL_SIZE)
+ {
+ fprintf(stderr,
+ _("%s: internal error -- sizeof(ControlFileData) is too large ... fix PG_CONTROL_SIZE\n"),
+ progname);
+ exit(1);
+ }
+
+ memset(buffer, 0, PG_CONTROL_SIZE);
+ memcpy(buffer, &ControlFile, sizeof(ControlFileData));
+
+ unlink(XLOG_CONTROL_FILE);
+
+ fd = open(XLOG_CONTROL_FILE,
+ O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
+ S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ {
+ fprintf(stderr, _("%s: could not create pg_control file: %s\n"),
+ progname, strerror(errno));
+ exit(1);
+ }
+
+ errno = 0;
+ if (write(fd, buffer, PG_CONTROL_SIZE) != PG_CONTROL_SIZE)
+ {
+ /* if write didn't set errno, assume problem is no disk space */
+ if (errno == 0)
+ errno = ENOSPC;
+ fprintf(stderr, _("%s: could not write pg_control file: %s\n"),
+ progname, strerror(errno));
+ exit(1);
+ }
+
+ if (fsync(fd) != 0)
+ {
+ fprintf(stderr, _("%s: fsync error: %s\n"), progname, strerror(errno));
+ exit(1);
+ }
+
+ close(fd);
+}
+
+
+/*
+ * Scan existing XLOG files and determine the highest existing WAL address
+ *
+ * On entry, ControlFile.checkPointCopy.redo and ControlFile.xlog_seg_size
+ * are assumed valid (note that we allow the old xlog seg size to differ
+ * from what we're using). On exit, newXlogId and newXlogSeg are set to
+ * suitable values for the beginning of replacement WAL (in our seg size).
+ */
+static void
+FindEndOfXLOG(void)
+{
+ DIR *xldir;
+ struct dirent *xlde;
+ uint64 segs_per_xlogid;
+ uint64 xlogbytepos;
+
+ /*
+ * Initialize the max() computation using the last checkpoint address from
+ * old pg_control. Note that for the moment we are working with segment
+ * numbering according to the old xlog seg size.
+ */
+ segs_per_xlogid = (UINT64CONST(0x0000000100000000) / ControlFile.xlog_seg_size);
+ newXlogSegNo = ControlFile.checkPointCopy.redo / ControlFile.xlog_seg_size;
+
+ /*
+ * Scan the pg_wal directory to find existing WAL segment files. We
+ * assume any present have been used; in most scenarios this should be
+ * conservative, because of xlog.c's attempts to pre-create files.
+ */
+ xldir = opendir(XLOGDIR);
+ if (xldir == NULL)
+ {
+ fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
+ progname, XLOGDIR, strerror(errno));
+ exit(1);
+ }
+
+ while (errno = 0, (xlde = readdir(xldir)) != NULL)
+ {
+ if (IsXLogFileName(xlde->d_name) ||
+ IsPartialXLogFileName(xlde->d_name))
+ {
+ unsigned int tli,
+ log,
+ seg;
+ XLogSegNo segno;
+
+ /*
+ * Note: We don't use XLogFromFileName here, because we want to
+ * use the segment size from the control file, not the size the
+ * pg_resetwal binary was compiled with
+ */
+ sscanf(xlde->d_name, "%08X%08X%08X", &tli, &log, &seg);
+ segno = ((uint64) log) * segs_per_xlogid + seg;
+
+ /*
+ * Note: we take the max of all files found, regardless of their
+ * timelines. Another possibility would be to ignore files of
+ * timelines other than the target TLI, but this seems safer.
+ * Better too large a result than too small...
+ */
+ if (segno > newXlogSegNo)
+ newXlogSegNo = segno;
+ }
+ }
+
+ if (errno)
+ {
+ fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"),
+ progname, XLOGDIR, strerror(errno));
+ exit(1);
+ }
+
+ if (closedir(xldir))
+ {
+ fprintf(stderr, _("%s: could not close directory \"%s\": %s\n"),
+ progname, XLOGDIR, strerror(errno));
+ exit(1);
+ }
+
+ /*
+ * Finally, convert to new xlog seg size, and advance by one to ensure we
+ * are in virgin territory.
+ */
+ xlogbytepos = newXlogSegNo * ControlFile.xlog_seg_size;
+ newXlogSegNo = (xlogbytepos + XLogSegSize - 1) / XLogSegSize;
+ newXlogSegNo++;
+}
+
+
+/*
+ * Remove existing XLOG files
+ */
+static void
+KillExistingXLOG(void)
+{
+ DIR *xldir;
+ struct dirent *xlde;
+ char path[MAXPGPATH];
+
+ xldir = opendir(XLOGDIR);
+ if (xldir == NULL)
+ {
+ fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
+ progname, XLOGDIR, strerror(errno));
+ exit(1);
+ }
+
+ while (errno = 0, (xlde = readdir(xldir)) != NULL)
+ {
+ if (IsXLogFileName(xlde->d_name) ||
+ IsPartialXLogFileName(xlde->d_name))
+ {
+ snprintf(path, MAXPGPATH, "%s/%s", XLOGDIR, xlde->d_name);
+ if (unlink(path) < 0)
+ {
+ fprintf(stderr, _("%s: could not delete file \"%s\": %s\n"),
+ progname, path, strerror(errno));
+ exit(1);
+ }
+ }
+ }
+
+ if (errno)
+ {
+ fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"),
+ progname, XLOGDIR, strerror(errno));
+ exit(1);
+ }
+
+ if (closedir(xldir))
+ {
+ fprintf(stderr, _("%s: could not close directory \"%s\": %s\n"),
+ progname, XLOGDIR, strerror(errno));
+ exit(1);
+ }
+}
+
+
+/*
+ * Remove existing archive status files
+ */
+static void
+KillExistingArchiveStatus(void)
+{
+ DIR *xldir;
+ struct dirent *xlde;
+ char path[MAXPGPATH];
+
+#define ARCHSTATDIR XLOGDIR "/archive_status"
+
+ xldir = opendir(ARCHSTATDIR);
+ if (xldir == NULL)
+ {
+ fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
+ progname, ARCHSTATDIR, strerror(errno));
+ exit(1);
+ }
+
+ while (errno = 0, (xlde = readdir(xldir)) != NULL)
+ {
+ if (strspn(xlde->d_name, "0123456789ABCDEF") == XLOG_FNAME_LEN &&
+ (strcmp(xlde->d_name + XLOG_FNAME_LEN, ".ready") == 0 ||
+ strcmp(xlde->d_name + XLOG_FNAME_LEN, ".done") == 0 ||
+ strcmp(xlde->d_name + XLOG_FNAME_LEN, ".partial.ready") == 0 ||
+ strcmp(xlde->d_name + XLOG_FNAME_LEN, ".partial.done") == 0))
+ {
+ snprintf(path, MAXPGPATH, "%s/%s", ARCHSTATDIR, xlde->d_name);
+ if (unlink(path) < 0)
+ {
+ fprintf(stderr, _("%s: could not delete file \"%s\": %s\n"),
+ progname, path, strerror(errno));
+ exit(1);
+ }
+ }
+ }
+
+ if (errno)
+ {
+ fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"),
+ progname, ARCHSTATDIR, strerror(errno));
+ exit(1);
+ }
+
+ if (closedir(xldir))
+ {
+ fprintf(stderr, _("%s: could not close directory \"%s\": %s\n"),
+ progname, ARCHSTATDIR, strerror(errno));
+ exit(1);
+ }
+}
+
+
+/*
+ * Write an empty XLOG file, containing only the checkpoint record
+ * already set up in ControlFile.
+ */
+static void
+WriteEmptyXLOG(void)
+{
+ char *buffer;
+ XLogPageHeader page;
+ XLogLongPageHeader longpage;
+ XLogRecord *record;
+ pg_crc32c crc;
+ char path[MAXPGPATH];
+ int fd;
+ int nbytes;
+ char *recptr;
+
+ /* Use malloc() to ensure buffer is MAXALIGNED */
+ buffer = (char *) pg_malloc(XLOG_BLCKSZ);
+ page = (XLogPageHeader) buffer;
+ memset(buffer, 0, XLOG_BLCKSZ);
+
+ /* Set up the XLOG page header */
+ page->xlp_magic = XLOG_PAGE_MAGIC;
+ page->xlp_info = XLP_LONG_HEADER;
+ page->xlp_tli = ControlFile.checkPointCopy.ThisTimeLineID;
+ page->xlp_pageaddr = ControlFile.checkPointCopy.redo - SizeOfXLogLongPHD;
+ longpage = (XLogLongPageHeader) page;
+ longpage->xlp_sysid = ControlFile.system_identifier;
+ longpage->xlp_seg_size = XLogSegSize;
+ longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
+
+ /* Insert the initial checkpoint record */
+ recptr = (char *) page + SizeOfXLogLongPHD;
+ record = (XLogRecord *) recptr;
+ record->xl_prev = 0;
+ record->xl_xid = InvalidTransactionId;
+ record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint);
+ record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
+ record->xl_rmid = RM_XLOG_ID;
+
+ recptr += SizeOfXLogRecord;
+ *(recptr++) = XLR_BLOCK_ID_DATA_SHORT;
+ *(recptr++) = sizeof(CheckPoint);
+ memcpy(recptr, &ControlFile.checkPointCopy,
+ sizeof(CheckPoint));
+
+ INIT_CRC32C(crc);
+ COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
+ COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
+ FIN_CRC32C(crc);
+ record->xl_crc = crc;
+
+ /* Write the first page */
+ XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID, newXlogSegNo);
+
+ unlink(path);
+
+ fd = open(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
+ S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ {
+ fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
+ progname, path, strerror(errno));
+ exit(1);
+ }
+
+ errno = 0;
+ if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+ {
+ /* if write didn't set errno, assume problem is no disk space */
+ if (errno == 0)
+ errno = ENOSPC;
+ fprintf(stderr, _("%s: could not write file \"%s\": %s\n"),
+ progname, path, strerror(errno));
+ exit(1);
+ }
+
+ /* Fill the rest of the file with zeroes */
+ memset(buffer, 0, XLOG_BLCKSZ);
+ for (nbytes = XLOG_BLCKSZ; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)
+ {
+ errno = 0;
+ if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+ {
+ if (errno == 0)
+ errno = ENOSPC;
+ fprintf(stderr, _("%s: could not write file \"%s\": %s\n"),
+ progname, path, strerror(errno));
+ exit(1);
+ }
+ }
+
+ if (fsync(fd) != 0)
+ {
+ fprintf(stderr, _("%s: fsync error: %s\n"), progname, strerror(errno));
+ exit(1);
+ }
+
+ close(fd);
+}
+
+
+static void
+usage(void)
+{
+ printf(_("%s resets the PostgreSQL transaction log.\n\n"), progname);
+ printf(_("Usage:\n %s [OPTION]... DATADIR\n\n"), progname);
+ printf(_("Options:\n"));
+ printf(_(" -c XID,XID set oldest and newest transactions bearing commit timestamp\n"));
+ printf(_(" (zero in either value means no change)\n"));
+ printf(_(" [-D] DATADIR data directory\n"));
+ printf(_(" -e XIDEPOCH set next transaction ID epoch\n"));
+ printf(_(" -f force update to be done\n"));
+ printf(_(" -l XLOGFILE force minimum WAL starting location for new transaction log\n"));
+ printf(_(" -m MXID,MXID set next and oldest multitransaction ID\n"));
+ printf(_(" -n no update, just show what would be done (for testing)\n"));
+ printf(_(" -o OID set next OID\n"));
+ printf(_(" -O OFFSET set next multitransaction offset\n"));
+ printf(_(" -V, --version output version information, then exit\n"));
+ printf(_(" -x XID set next transaction ID\n"));
+ printf(_(" -?, --help show this help, then exit\n"));
+ printf(_("\nReport bugs to .\n"));
+}
diff --git a/src/bin/pg_resetwal/po/cs.po b/src/bin/pg_resetwal/po/cs.po
new file mode 100644
index 0000000000..52ddcacc82
--- /dev/null
+++ b/src/bin/pg_resetwal/po/cs.po
@@ -0,0 +1,498 @@
+# Czech message translation file for pg_resetxlog
+# Copyright (C) 2012 PostgreSQL Global Development Group
+# This file is distributed under the same license as the PostgreSQL package.
+#
+# Tomas Vondra , 2012, 2013.
+msgid ""
+msgstr ""
+"Project-Id-Version: pg_resetxlog-cs (PostgreSQL 9.3)\n"
+"Report-Msgid-Bugs-To: pgsql-bugs@postgresql.org\n"
+"POT-Creation-Date: 2013-09-23 20:18+0000\n"
+"PO-Revision-Date: 2013-12-01 20:46-0500\n"
+"Last-Translator: Tomas Vondra \n"
+"Language-Team: Czech \n"
+"Language: cs\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=3; plural=(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2;\n"
+"X-Generator: Lokalize 1.5\n"
+
+#: pg_resetxlog.c:133
+#, c-format
+msgid "%s: invalid argument for option -e\n"
+msgstr "%s: neplatný argument pro volbu -e\n"
+
+#: pg_resetxlog.c:134 pg_resetxlog.c:149 pg_resetxlog.c:164 pg_resetxlog.c:179
+#: pg_resetxlog.c:187 pg_resetxlog.c:213 pg_resetxlog.c:227 pg_resetxlog.c:234
+#: pg_resetxlog.c:242
+#, c-format
+msgid "Try \"%s --help\" for more information.\n"
+msgstr "Zkuste \"%s --help\" pro více informací.\n"
+
+#: pg_resetxlog.c:139
+#, c-format
+msgid "%s: transaction ID epoch (-e) must not be -1\n"
+msgstr "%s: epocha ID transakce (-e) nesmí být -1\n"
+
+#: pg_resetxlog.c:148
+#, c-format
+msgid "%s: invalid argument for option -x\n"
+msgstr "%s: neplatný argument pro volbu -x\n"
+
+#: pg_resetxlog.c:154
+#, c-format
+msgid "%s: transaction ID (-x) must not be 0\n"
+msgstr "%s: ID transakce (-x) nesmí být 0\n"
+
+#: pg_resetxlog.c:163
+#, c-format
+msgid "%s: invalid argument for option -o\n"
+msgstr "%s: neplatný argument pro volbu -o\n"
+
+#: pg_resetxlog.c:169
+#, c-format
+msgid "%s: OID (-o) must not be 0\n"
+msgstr "%s: OID (-o) nesmí být 0\n"
+
+#: pg_resetxlog.c:178 pg_resetxlog.c:186
+#, c-format
+msgid "%s: invalid argument for option -m\n"
+msgstr "%s: neplatný argument pro volbu -m\n"
+
+#: pg_resetxlog.c:192
+#, c-format
+msgid "%s: multitransaction ID (-m) must not be 0\n"
+msgstr "%s: ID transakce (-m) nesmí být 0\n"
+
+#: pg_resetxlog.c:202
+#, c-format
+msgid "%s: oldest multitransaction ID (-m) must not be 0\n"
+msgstr "%s: ID nejstarší multitransakce (-m) nesmí být 0\n"
+
+#: pg_resetxlog.c:212
+#, c-format
+msgid "%s: invalid argument for option -O\n"
+msgstr "%s: neplatný argument pro volbu -O\n"
+
+#: pg_resetxlog.c:218
+#, c-format
+msgid "%s: multitransaction offset (-O) must not be -1\n"
+msgstr "%s: ID transakce (-O) nesmí být -1\n"
+
+#: pg_resetxlog.c:226
+#, c-format
+msgid "%s: invalid argument for option -l\n"
+msgstr "%s: neplatný argument pro volbu -l\n"
+
+#: pg_resetxlog.c:241
+#, c-format
+msgid "%s: no data directory specified\n"
+msgstr "%s: není specifikován datový adresář\n"
+
+#: pg_resetxlog.c:255
+#, c-format
+msgid "%s: cannot be executed by \"root\"\n"
+msgstr "%s: nemůže být spuštěn uživatelem \"root\"\n"
+
+#: pg_resetxlog.c:257
+#, c-format
+msgid "You must run %s as the PostgreSQL superuser.\n"
+msgstr "Musíte spustit %s jako PostgreSQL superuživatel.\n"
+
+#: pg_resetxlog.c:267
+#, c-format
+msgid "%s: could not change directory to \"%s\": %s\n"
+msgstr "%s: nelze změnit adresář na \"%s\": %s\n"
+
+#: pg_resetxlog.c:280 pg_resetxlog.c:414
+#, c-format
+msgid "%s: could not open file \"%s\" for reading: %s\n"
+msgstr "%s: nelze otevřít soubor \"%s\" pro čtení: %s\n"
+
+#: pg_resetxlog.c:287
+#, c-format
+msgid ""
+"%s: lock file \"%s\" exists\n"
+"Is a server running? If not, delete the lock file and try again.\n"
+msgstr ""
+"%s: soubor se zámkem \"%s\" existuje\n"
+"Neběží již server? Jestliže ne, smažte soubor se zámkem a zkuste to znova.\n"
+
+#: pg_resetxlog.c:362
+#, c-format
+msgid ""
+"\n"
+"If these values seem acceptable, use -f to force reset.\n"
+msgstr ""
+"\n"
+"Jestliže tyto hodnoty vypadají akceptovatelně, použijte -f pro vynucený "
+"reset.\n"
+
+#: pg_resetxlog.c:374
+#, c-format
+msgid ""
+"The database server was not shut down cleanly.\n"
+"Resetting the transaction log might cause data to be lost.\n"
+"If you want to proceed anyway, use -f to force reset.\n"
+msgstr ""
+"Databázový server nebyl ukončen standardně.\n"
+"Resetování transakčního logu může způsobit ztrátu dat.\n"
+"Jestliže i přesto chcete pokračovat, použijte -f pro vynucený reset.\n"
+
+#: pg_resetxlog.c:388
+#, c-format
+msgid "Transaction log reset\n"
+msgstr "Transakční log resetován\n"
+
+#: pg_resetxlog.c:417
+#, c-format
+msgid ""
+"If you are sure the data directory path is correct, execute\n"
+" touch %s\n"
+"and try again.\n"
+msgstr ""
+"Máte-li jistotu, že je cesta k datovému adresáři správná, proveďte\n"
+" touch %s\n"
+"a zkuste to znovu.\n"
+
+#: pg_resetxlog.c:430
+#, c-format
+msgid "%s: could not read file \"%s\": %s\n"
+msgstr "%s: nelze číst soubor \"%s\": %s\n"
+
+#: pg_resetxlog.c:453
+#, c-format
+msgid "%s: pg_control exists but has invalid CRC; proceed with caution\n"
+msgstr ""
+"%s: pg_control existuje, ale s neplatným kontrolním součtem CRC; postupujte "
+"opatrně\n"
+
+#: pg_resetxlog.c:462
+#, c-format
+msgid "%s: pg_control exists but is broken or unknown version; ignoring it\n"
+msgstr ""
+"%s: pg_control existuje, ale je poškozen nebo neznámé verze; ignoruji to\n"
+
+#: pg_resetxlog.c:561
+#, c-format
+msgid ""
+"Guessed pg_control values:\n"
+"\n"
+msgstr ""
+"Odhadnuté hodnoty pg_controlu:\n"
+"\n"
+
+#: pg_resetxlog.c:563
+#, c-format
+msgid ""
+"pg_control values:\n"
+"\n"
+msgstr ""
+"Hodnoty pg_controlu:\n"
+"\n"
+
+#: pg_resetxlog.c:574
+#, c-format
+msgid "First log segment after reset: %s\n"
+msgstr "První log segment po resetu: %s\n"
+
+#: pg_resetxlog.c:576
+#, c-format
+msgid "pg_control version number: %u\n"
+msgstr "číslo verze pg_controlu: %u\n"
+
+#: pg_resetxlog.c:578
+#, c-format
+msgid "Catalog version number: %u\n"
+msgstr "Číslo verze katalogu: %u\n"
+
+#: pg_resetxlog.c:580
+#, c-format
+msgid "Database system identifier: %s\n"
+msgstr "Identifikátor databázového systému: %s\n"
+
+#: pg_resetxlog.c:582
+#, c-format
+msgid "Latest checkpoint's TimeLineID: %u\n"
+msgstr "TimeLineID posledního checkpointu: %u\n"
+
+#: pg_resetxlog.c:584
+#, c-format
+msgid "Latest checkpoint's full_page_writes: %s\n"
+msgstr "Poslední full_page_writes checkpointu: %s\n"
+
+#: pg_resetxlog.c:585
+msgid "off"
+msgstr "vypnuto"
+
+#: pg_resetxlog.c:585
+msgid "on"
+msgstr "zapnuto"
+
+#: pg_resetxlog.c:586
+#, c-format
+msgid "Latest checkpoint's NextXID: %u/%u\n"
+msgstr "Poslední umístění NextXID checkpointu: %u/%u\n"
+
+#: pg_resetxlog.c:589
+#, c-format
+msgid "Latest checkpoint's NextOID: %u\n"
+msgstr "Poslední umístění NextOID checkpointu: %u\n"
+
+#: pg_resetxlog.c:591
+#, c-format
+msgid "Latest checkpoint's NextMultiXactId: %u\n"
+msgstr "NextMultiXactId posledního checkpointu: %u\n"
+
+#: pg_resetxlog.c:593
+#, c-format
+msgid "Latest checkpoint's NextMultiOffset: %u\n"
+msgstr "NextMultiOffset posledního checkpointu: %u\n"
+
+#: pg_resetxlog.c:595
+#, c-format
+msgid "Latest checkpoint's oldestXID: %u\n"
+msgstr "oldestXID posledního checkpointu: %u\n"
+
+#: pg_resetxlog.c:597
+#, c-format
+msgid "Latest checkpoint's oldestXID's DB: %u\n"
+msgstr "DB k oldestXID posledního checkpointu: %u\n"
+
+#: pg_resetxlog.c:599
+#, c-format
+msgid "Latest checkpoint's oldestActiveXID: %u\n"
+msgstr "oldestActiveXID posledního checkpointu: %u\n"
+
+#: pg_resetxlog.c:601
+#, c-format
+msgid "Latest checkpoint's oldestMultiXid: %u\n"
+msgstr "oldestMultiXid posledního checkpointu: %u\n"
+
+#: pg_resetxlog.c:603
+#, c-format
+msgid "Latest checkpoint's oldestMulti's DB: %u\n"
+msgstr "oldestMulti's DB posledního checkpointu: %u\n"
+
+#: pg_resetxlog.c:605
+#, c-format
+msgid "Maximum data alignment: %u\n"
+msgstr "Maximální zarovnání dat: %u\n"
+
+#: pg_resetxlog.c:608
+#, c-format
+msgid "Database block size: %u\n"
+msgstr "Velikost databázového bloku: %u\n"
+
+#: pg_resetxlog.c:610
+#, c-format
+msgid "Blocks per segment of large relation: %u\n"
+msgstr "Bloků v segmentu velké relace: %u\n"
+
+#: pg_resetxlog.c:612
+#, c-format
+msgid "WAL block size: %u\n"
+msgstr "Velikost WAL bloku: %u\n"
+
+#: pg_resetxlog.c:614
+#, c-format
+msgid "Bytes per WAL segment: %u\n"
+msgstr "Bytů ve WAL segmentu: %u\n"
+
+#: pg_resetxlog.c:616
+#, c-format
+msgid "Maximum length of identifiers: %u\n"
+msgstr "Maximální délka identifikátorů: %u\n"
+
+#: pg_resetxlog.c:618
+#, c-format
+msgid "Maximum columns in an index: %u\n"
+msgstr "Maximální počet sloupců v indexu: %u\n"
+
+#: pg_resetxlog.c:620
+#, c-format
+msgid "Maximum size of a TOAST chunk: %u\n"
+msgstr "Maximální velikost úseku TOAST: %u\n"
+
+#: pg_resetxlog.c:622
+#, c-format
+msgid "Date/time type storage: %s\n"
+msgstr "Způsob uložení typu date/time: %s\n"
+
+#: pg_resetxlog.c:623
+msgid "64-bit integers"
+msgstr "64-bitová čísla"
+
+#: pg_resetxlog.c:623
+msgid "floating-point numbers"
+msgstr "čísla s plovoucí řádovou čárkou"
+
+#: pg_resetxlog.c:624
+#, c-format
+msgid "Float4 argument passing: %s\n"
+msgstr "Způsob předávání float4 hodnot: %s\n"
+
+#: pg_resetxlog.c:625 pg_resetxlog.c:627
+msgid "by reference"
+msgstr "odkazem"
+
+#: pg_resetxlog.c:625 pg_resetxlog.c:627
+msgid "by value"
+msgstr "hodnotou"
+
+#: pg_resetxlog.c:626
+#, c-format
+msgid "Float8 argument passing: %s\n"
+msgstr "Způsob předávání float8 hodnot: %s\n"
+
+#: pg_resetxlog.c:628
+#, c-format
+#| msgid "Catalog version number: %u\n"
+msgid "Data page checksum version: %u\n"
+msgstr "Verze kontrolních součtů datových stránek: %u\n"
+
+#: pg_resetxlog.c:690
+#, c-format
+msgid ""
+"%s: internal error -- sizeof(ControlFileData) is too large ... fix "
+"PG_CONTROL_SIZE\n"
+msgstr ""
+"%s: interní chyba -- sizeof(ControlFileData) je příliš velký ... opravte "
+"PG_CONTROL_SIZE\n"
+
+#: pg_resetxlog.c:705
+#, c-format
+msgid "%s: could not create pg_control file: %s\n"
+msgstr "%s: nelze vytvořit pg_control soubor: %s\n"
+
+#: pg_resetxlog.c:716
+#, c-format
+msgid "%s: could not write pg_control file: %s\n"
+msgstr "%s: nelze zapsat pg_control soubor: %s\n"
+
+#: pg_resetxlog.c:723 pg_resetxlog.c:1022
+#, c-format
+msgid "%s: fsync error: %s\n"
+msgstr "%s: fsync chyba: %s\n"
+
+#: pg_resetxlog.c:763 pg_resetxlog.c:834 pg_resetxlog.c:890
+#, c-format
+msgid "%s: could not open directory \"%s\": %s\n"
+msgstr "%s: nelze otevřít adresář \"%s\": %s\n"
+
+#: pg_resetxlog.c:805 pg_resetxlog.c:867 pg_resetxlog.c:924
+#, c-format
+msgid "%s: could not read from directory \"%s\": %s\n"
+msgstr "%s: nelze číst z adresáře \"%s\": %s\n"
+
+#: pg_resetxlog.c:848 pg_resetxlog.c:905
+#, c-format
+msgid "%s: could not delete file \"%s\": %s\n"
+msgstr "%s: nelze smazat soubor \"%s\": %s\n"
+
+#: pg_resetxlog.c:989
+#, c-format
+msgid "%s: could not open file \"%s\": %s\n"
+msgstr "%s: nelze otevřít soubor \"%s\": %s\n"
+
+#: pg_resetxlog.c:1000 pg_resetxlog.c:1014
+#, c-format
+msgid "%s: could not write file \"%s\": %s\n"
+msgstr "%s: nelze zapsat do souboru \"%s\": %s\n"
+
+#: pg_resetxlog.c:1033
+#, c-format
+msgid ""
+"%s resets the PostgreSQL transaction log.\n"
+"\n"
+msgstr ""
+"%s resetuje PostgreSQL transakční log.\n"
+"\n"
+
+#: pg_resetxlog.c:1034
+#, c-format
+msgid ""
+"Usage:\n"
+" %s [OPTION]... DATADIR\n"
+"\n"
+msgstr ""
+"Použití:\n"
+" %s [VOLBA]... ADRESÁŘ\n"
+"\n"
+
+#: pg_resetxlog.c:1035
+#, c-format
+msgid "Options:\n"
+msgstr "Přepínače:\n"
+
+#: pg_resetxlog.c:1036
+#, c-format
+msgid " -e XIDEPOCH set next transaction ID epoch\n"
+msgstr " -e XIDEPOCH nastaví epochu následujícího ID transakce\n"
+
+#: pg_resetxlog.c:1037
+#, c-format
+msgid " -f force update to be done\n"
+msgstr " -f vynutí provedení update\n"
+
+#: pg_resetxlog.c:1038
+#, c-format
+msgid ""
+" -l XLOGFILE force minimum WAL starting location for new transaction "
+"log\n"
+msgstr ""
+" -l XLOGFILE vynutí minimální počáteční WAL pozici pro nový transakční "
+"log\n"
+
+#: pg_resetxlog.c:1039
+#, c-format
+#| msgid " -x XID set next transaction ID\n"
+msgid " -m MXID,MXID set next and oldest multitransaction ID\n"
+msgstr " -m MXID,MXID nastav další a nejstarší ID multitransakce\n"
+
+#: pg_resetxlog.c:1040
+#, c-format
+msgid ""
+" -n no update, just show extracted control values (for "
+"testing)\n"
+msgstr ""
+" -n bez změny, jen ukáže získané kontrolní hodnoty (pro "
+"testování)\n"
+
+#: pg_resetxlog.c:1041
+#, c-format
+msgid " -o OID set next OID\n"
+msgstr " -o OID nastaví následující OID\n"
+
+#: pg_resetxlog.c:1042
+#, c-format
+msgid " -O OFFSET set next multitransaction offset\n"
+msgstr " -O OFFSET nastaví offset následující multitransakce\n"
+
+#: pg_resetxlog.c:1043
+#, c-format
+msgid " -V, --version output version information, then exit\n"
+msgstr " -V, --version ukáže informace o verzi a skončí\n"
+
+#: pg_resetxlog.c:1044
+#, c-format
+msgid " -x XID set next transaction ID\n"
+msgstr " -x XID nastaví ID následující transakce\n"
+
+#: pg_resetxlog.c:1045
+#, c-format
+msgid " -?, --help show this help, then exit\n"
+msgstr " -?, --help ukáže tuto nápovědu a skončí\n"
+
+#: pg_resetxlog.c:1046
+#, c-format
+msgid ""
+"\n"
+"Report bugs to .\n"
+msgstr ""
+"\n"
+"Chyby hlaste na adresu .\n"
+
+#~ msgid "First log file ID after reset: %u\n"
+#~ msgstr "První ID log souboru po resetu: %u\n"
diff --git a/src/bin/pg_resetwal/po/de.po b/src/bin/pg_resetwal/po/de.po
new file mode 100644
index 0000000000..fb6324d1e2
--- /dev/null
+++ b/src/bin/pg_resetwal/po/de.po
@@ -0,0 +1,605 @@
+# German message translation file for pg_resetxlog
+# Peter Eisentraut , 2002 - 2016.
+#
+# Use these quotes: »%s«
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PostgreSQL 9.6\n"
+"Report-Msgid-Bugs-To: pgsql-bugs@postgresql.org\n"
+"POT-Creation-Date: 2016-04-12 20:14+0000\n"
+"PO-Revision-Date: 2016-04-12 18:28-0400\n"
+"Last-Translator: Peter Eisentraut \n"
+"Language-Team: German \n"
+"Language: de\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#: ../../common/restricted_token.c:68
+#, c-format
+msgid "%s: WARNING: cannot create restricted tokens on this platform\n"
+msgstr "%s: WARNUNG: auf dieser Plattform können keine beschränkten Token erzeugt werden\n"
+
+#: ../../common/restricted_token.c:77
+#, c-format
+msgid "%s: could not open process token: error code %lu\n"
+msgstr "%s: konnte Prozess-Token nicht öffnen: Fehlercode %lu\n"
+
+#: ../../common/restricted_token.c:90
+#, c-format
+msgid "%s: could not allocate SIDs: error code %lu\n"
+msgstr "%s: konnte SIDs nicht erzeugen: Fehlercode %lu\n"
+
+#: ../../common/restricted_token.c:110
+#, c-format
+msgid "%s: could not create restricted token: error code %lu\n"
+msgstr "%s: konnte beschränktes Token nicht erzeugen: Fehlercode %lu\n"
+
+#: ../../common/restricted_token.c:132
+#, c-format
+msgid "%s: could not start process for command \"%s\": error code %lu\n"
+msgstr "%s: konnte Prozess für Befehl »%s« nicht starten: Fehlercode %lu\n"
+
+#: ../../common/restricted_token.c:170
+#, c-format
+msgid "%s: could not re-execute with restricted token: error code %lu\n"
+msgstr "%s: konnte Prozess nicht mit beschränktem Token neu starten: Fehlercode %lu\n"
+
+#: ../../common/restricted_token.c:186
+#, c-format
+msgid "%s: could not get exit code from subprocess: error code %lu\n"
+msgstr "%s: konnte Statuscode des Subprozesses nicht ermitteln: Fehlercode %lu\n"
+
+#. translator: the second %s is a command line argument (-e, etc)
+#: pg_resetxlog.c:140 pg_resetxlog.c:155 pg_resetxlog.c:170 pg_resetxlog.c:177
+#: pg_resetxlog.c:201 pg_resetxlog.c:216 pg_resetxlog.c:224 pg_resetxlog.c:250
+#: pg_resetxlog.c:264
+#, c-format
+msgid "%s: invalid argument for option %s\n"
+msgstr "%s: ungültiges Argument für Option %s\n"
+
+#: pg_resetxlog.c:141 pg_resetxlog.c:156 pg_resetxlog.c:171 pg_resetxlog.c:178
+#: pg_resetxlog.c:202 pg_resetxlog.c:217 pg_resetxlog.c:225 pg_resetxlog.c:251
+#: pg_resetxlog.c:265 pg_resetxlog.c:272 pg_resetxlog.c:285 pg_resetxlog.c:293
+#, c-format
+msgid "Try \"%s --help\" for more information.\n"
+msgstr "Versuchen Sie »%s --help« für weitere Informationen.\n"
+
+#: pg_resetxlog.c:146
+#, c-format
+msgid "%s: transaction ID epoch (-e) must not be -1\n"
+msgstr "%s: Transaktions-ID-Epoche (-e) darf nicht -1 sein\n"
+
+#: pg_resetxlog.c:161
+#, c-format
+msgid "%s: transaction ID (-x) must not be 0\n"
+msgstr "%s: Transaktions-ID (-x) darf nicht 0 sein\n"
+
+#: pg_resetxlog.c:185 pg_resetxlog.c:192
+#, c-format
+msgid "%s: transaction ID (-c) must be either 0 or greater than or equal to 2\n"
+msgstr "%s: Transaktions-ID (-c) muss entweder 0 oder größer oder gleich 2 sein\n"
+
+#: pg_resetxlog.c:207
+#, c-format
+msgid "%s: OID (-o) must not be 0\n"
+msgstr "%s: OID (-o) darf nicht 0 sein\n"
+
+#: pg_resetxlog.c:230
+#, c-format
+msgid "%s: multitransaction ID (-m) must not be 0\n"
+msgstr "%s: Multitransaktions-ID (-m) darf nicht 0 sein\n"
+
+#: pg_resetxlog.c:240
+#, c-format
+msgid "%s: oldest multitransaction ID (-m) must not be 0\n"
+msgstr "%s: älteste Multitransaktions-ID (-m) darf nicht 0 sein\n"
+
+#: pg_resetxlog.c:256
+#, c-format
+msgid "%s: multitransaction offset (-O) must not be -1\n"
+msgstr "%s: Multitransaktions-Offset (-O) darf nicht -1 sein\n"
+
+#: pg_resetxlog.c:283
+#, c-format
+msgid "%s: too many command-line arguments (first is \"%s\")\n"
+msgstr "%s: zu viele Kommandozeilenargumente (das erste ist »%s«)\n"
+
+#: pg_resetxlog.c:292
+#, c-format
+msgid "%s: no data directory specified\n"
+msgstr "%s: kein Datenverzeichnis angegeben\n"
+
+#: pg_resetxlog.c:306
+#, c-format
+msgid "%s: cannot be executed by \"root\"\n"
+msgstr "%s: kann nicht von »root« ausgeführt werden\n"
+
+#: pg_resetxlog.c:308
+#, c-format
+msgid "You must run %s as the PostgreSQL superuser.\n"
+msgstr "Sie müssen %s als PostgreSQL-Superuser ausführen.\n"
+
+#: pg_resetxlog.c:318
+#, c-format
+msgid "%s: could not change directory to \"%s\": %s\n"
+msgstr "%s: konnte nicht in Verzeichnis »%s« wechseln: %s\n"
+
+#: pg_resetxlog.c:331 pg_resetxlog.c:477
+#, c-format
+msgid "%s: could not open file \"%s\" for reading: %s\n"
+msgstr "%s: konnte Datei »%s« nicht zum Lesen öffnen: %s\n"
+
+#: pg_resetxlog.c:338
+#, c-format
+msgid ""
+"%s: lock file \"%s\" exists\n"
+"Is a server running? If not, delete the lock file and try again.\n"
+msgstr ""
+"%s: Sperrdatei »%s« existiert bereits\n"
+"Läuft der Server? Wenn nicht, dann Sperrdatei löschen und nochmal versuchen.\n"
+
+#: pg_resetxlog.c:425
+#, c-format
+msgid ""
+"\n"
+"If these values seem acceptable, use -f to force reset.\n"
+msgstr ""
+"\n"
+"Wenn diese Werte akzeptabel scheinen, dann benutzen Sie -f um das\n"
+"Zurücksetzen zu erzwingen.\n"
+
+#: pg_resetxlog.c:437
+#, c-format
+msgid ""
+"The database server was not shut down cleanly.\n"
+"Resetting the transaction log might cause data to be lost.\n"
+"If you want to proceed anyway, use -f to force reset.\n"
+msgstr ""
+"Der Datenbankserver wurde nicht sauber heruntergefahren.\n"
+"Beim Zurücksetzen des Transaktionslogs können Daten verloren gehen.\n"
+"Wenn Sie trotzdem weiter machen wollen, benutzen Sie -f, um das\n"
+"Zurücksetzen zu erzwingen.\n"
+
+#: pg_resetxlog.c:451
+#, c-format
+msgid "Transaction log reset\n"
+msgstr "Transaktionslog wurde zurück gesetzt\n"
+
+#: pg_resetxlog.c:480
+#, c-format
+msgid ""
+"If you are sure the data directory path is correct, execute\n"
+" touch %s\n"
+"and try again.\n"
+msgstr ""
+"Wenn Sie sicher sind, dass das Datenverzeichnis korrekt ist, führen Sie\n"
+" touch %s\n"
+"aus und versuchen Sie es erneut.\n"
+
+#: pg_resetxlog.c:493
+#, c-format
+msgid "%s: could not read file \"%s\": %s\n"
+msgstr "%s: konnte Datei »%s« nicht lesen: %s\n"
+
+#: pg_resetxlog.c:516
+#, c-format
+msgid "%s: pg_control exists but has invalid CRC; proceed with caution\n"
+msgstr "%s: pg_control existiert, aber mit ungültiger CRC; mit Vorsicht fortfahren\n"
+
+#: pg_resetxlog.c:525
+#, c-format
+msgid "%s: pg_control exists but is broken or unknown version; ignoring it\n"
+msgstr "%s: pg_control existiert, aber ist kaputt oder hat unbekannte Version; wird ignoriert\n"
+
+#: pg_resetxlog.c:628
+#, c-format
+msgid ""
+"Guessed pg_control values:\n"
+"\n"
+msgstr ""
+"Geschätzte pg_control-Werte:\n"
+"\n"
+
+#: pg_resetxlog.c:630
+#, c-format
+msgid ""
+"Current pg_control values:\n"
+"\n"
+msgstr ""
+"Aktuelle pg_control-Werte:\n"
+"\n"
+
+#: pg_resetxlog.c:639
+#, c-format
+msgid "pg_control version number: %u\n"
+msgstr "pg_control-Versionsnummer: %u\n"
+
+#: pg_resetxlog.c:641
+#, c-format
+msgid "Catalog version number: %u\n"
+msgstr "Katalogversionsnummer: %u\n"
+
+#: pg_resetxlog.c:643
+#, c-format
+msgid "Database system identifier: %s\n"
+msgstr "Datenbanksystemidentifikation: %s\n"
+
+#: pg_resetxlog.c:645
+#, c-format
+msgid "Latest checkpoint's TimeLineID: %u\n"
+msgstr "TimeLineID des letzten Checkpoints: %u\n"
+
+#: pg_resetxlog.c:647
+#, c-format
+msgid "Latest checkpoint's full_page_writes: %s\n"
+msgstr "full_page_writes des letzten Checkpoints: %s\n"
+
+#: pg_resetxlog.c:648
+msgid "off"
+msgstr "aus"
+
+#: pg_resetxlog.c:648
+msgid "on"
+msgstr "an"
+
+#: pg_resetxlog.c:649
+#, c-format
+msgid "Latest checkpoint's NextXID: %u:%u\n"
+msgstr "NextXID des letzten Checkpoints: %u:%u\n"
+
+#: pg_resetxlog.c:652
+#, c-format
+msgid "Latest checkpoint's NextOID: %u\n"
+msgstr "NextOID des letzten Checkpoints: %u\n"
+
+#: pg_resetxlog.c:654
+#, c-format
+msgid "Latest checkpoint's NextMultiXactId: %u\n"
+msgstr "NextMultiXactId des letzten Checkpoints: %u\n"
+
+#: pg_resetxlog.c:656
+#, c-format
+msgid "Latest checkpoint's NextMultiOffset: %u\n"
+msgstr "NextMultiOffset des letzten Checkpoints: %u\n"
+
+#: pg_resetxlog.c:658
+#, c-format
+msgid "Latest checkpoint's oldestXID: %u\n"
+msgstr "oldestXID des letzten Checkpoints: %u\n"
+
+#: pg_resetxlog.c:660
+#, c-format
+msgid "Latest checkpoint's oldestXID's DB: %u\n"
+msgstr "DB der oldestXID des letzten Checkpoints: %u\n"
+
+#: pg_resetxlog.c:662
+#, c-format
+msgid "Latest checkpoint's oldestActiveXID: %u\n"
+msgstr "oldestActiveXID des letzten Checkpoints: %u\n"
+
+#: pg_resetxlog.c:664
+#, c-format
+msgid "Latest checkpoint's oldestMultiXid: %u\n"
+msgstr "oldestMultiXid des letzten Checkpoints: %u\n"
+
+#: pg_resetxlog.c:666
+#, c-format
+msgid "Latest checkpoint's oldestMulti's DB: %u\n"
+msgstr "DB des oldestMulti des letzten Checkpoints: %u\n"
+
+#: pg_resetxlog.c:668
+#, c-format
+msgid "Latest checkpoint's oldestCommitTsXid:%u\n"
+msgstr "oldestCommitTsXid des letzten Checkpoints: %u\n"
+
+#: pg_resetxlog.c:670
+#, c-format
+msgid "Latest checkpoint's newestCommitTsXid:%u\n"
+msgstr "newestCommitTsXid des letzten Checkpoints: %u\n"
+
+#: pg_resetxlog.c:672
+#, c-format
+msgid "Maximum data alignment: %u\n"
+msgstr "Maximale Datenausrichtung (Alignment): %u\n"
+
+#: pg_resetxlog.c:675
+#, c-format
+msgid "Database block size: %u\n"
+msgstr "Datenbankblockgröße: %u\n"
+
+#: pg_resetxlog.c:677
+#, c-format
+msgid "Blocks per segment of large relation: %u\n"
+msgstr "Blöcke pro Segment: %u\n"
+
+#: pg_resetxlog.c:679
+#, c-format
+msgid "WAL block size: %u\n"
+msgstr "WAL-Blockgröße: %u\n"
+
+#: pg_resetxlog.c:681
+#, c-format
+msgid "Bytes per WAL segment: %u\n"
+msgstr "Bytes pro WAL-Segment: %u\n"
+
+#: pg_resetxlog.c:683
+#, c-format
+msgid "Maximum length of identifiers: %u\n"
+msgstr "Maximale Bezeichnerlänge: %u\n"
+
+#: pg_resetxlog.c:685
+#, c-format
+msgid "Maximum columns in an index: %u\n"
+msgstr "Maximale Spalten in einem Index: %u\n"
+
+#: pg_resetxlog.c:687
+#, c-format
+msgid "Maximum size of a TOAST chunk: %u\n"
+msgstr "Maximale Größe eines Stücks TOAST: %u\n"
+
+#: pg_resetxlog.c:689
+#, c-format
+msgid "Size of a large-object chunk: %u\n"
+msgstr "Größe eines Large-Object-Chunks: %u\n"
+
+#: pg_resetxlog.c:691
+#, c-format
+msgid "Date/time type storage: %s\n"
+msgstr "Speicherung von Datum/Zeit-Typen: %s\n"
+
+#: pg_resetxlog.c:692
+msgid "64-bit integers"
+msgstr "64-Bit-Ganzzahlen"
+
+#: pg_resetxlog.c:692
+msgid "floating-point numbers"
+msgstr "Gleitkommazahlen"
+
+#: pg_resetxlog.c:693
+#, c-format
+msgid "Float4 argument passing: %s\n"
+msgstr "Übergabe von Float4-Argumenten: %s\n"
+
+#: pg_resetxlog.c:694 pg_resetxlog.c:696
+msgid "by reference"
+msgstr "Referenz"
+
+#: pg_resetxlog.c:694 pg_resetxlog.c:696
+msgid "by value"
+msgstr "Wert"
+
+#: pg_resetxlog.c:695
+#, c-format
+msgid "Float8 argument passing: %s\n"
+msgstr "Übergabe von Float8-Argumenten: %s\n"
+
+#: pg_resetxlog.c:697
+#, c-format
+msgid "Data page checksum version: %u\n"
+msgstr "Datenseitenprüfsummenversion: %u\n"
+
+#: pg_resetxlog.c:711
+#, c-format
+msgid ""
+"\n"
+"\n"
+"Values to be changed:\n"
+"\n"
+msgstr ""
+"\n"
+"\n"
+"Zu ändernde Werte:\n"
+"\n"
+
+#: pg_resetxlog.c:714
+#, c-format
+msgid "First log segment after reset: %s\n"
+msgstr "Erstes Logdateisegment nach Zurücksetzen: %s\n"
+
+#: pg_resetxlog.c:718
+#, c-format
+msgid "NextMultiXactId: %u\n"
+msgstr "NextMultiXactId: %u\n"
+
+#: pg_resetxlog.c:720
+#, c-format
+msgid "OldestMultiXid: %u\n"
+msgstr "OldestMultiXid: %u\n"
+
+#: pg_resetxlog.c:722
+#, c-format
+msgid "OldestMulti's DB: %u\n"
+msgstr "OldestMulti's DB: %u\n"
+
+#: pg_resetxlog.c:728
+#, c-format
+msgid "NextMultiOffset: %u\n"
+msgstr "NextMultiOffset: %u\n"
+
+#: pg_resetxlog.c:734
+#, c-format
+msgid "NextOID: %u\n"
+msgstr "NextOID: %u\n"
+
+#: pg_resetxlog.c:740
+#, c-format
+msgid "NextXID: %u\n"
+msgstr "NextXID: %u\n"
+
+#: pg_resetxlog.c:742
+#, c-format
+msgid "OldestXID: %u\n"
+msgstr "OldestXID: %u\n"
+
+#: pg_resetxlog.c:744
+#, c-format
+msgid "OldestXID's DB: %u\n"
+msgstr "OldestXID's DB: %u\n"
+
+#: pg_resetxlog.c:750
+#, c-format
+msgid "NextXID epoch: %u\n"
+msgstr "NextXID-Epoche: %u\n"
+
+#: pg_resetxlog.c:756
+#, c-format
+msgid "oldestCommitTsXid: %u\n"
+msgstr "oldestCommitTsXid: %u\n"
+
+#: pg_resetxlog.c:761
+#, c-format
+msgid "newestCommitTsXid: %u\n"
+msgstr "newestCommitTsXid: %u\n"
+
+#: pg_resetxlog.c:827
+#, c-format
+msgid "%s: internal error -- sizeof(ControlFileData) is too large ... fix PG_CONTROL_SIZE\n"
+msgstr "%s: interner Fehler -- sizeof(ControlFileData) ist zu groß ... PG_CONTROL_SIZE reparieren\n"
+
+#: pg_resetxlog.c:842
+#, c-format
+msgid "%s: could not create pg_control file: %s\n"
+msgstr "%s: konnte pg_control-Datei nicht erstellen: %s\n"
+
+#: pg_resetxlog.c:853
+#, c-format
+msgid "%s: could not write pg_control file: %s\n"
+msgstr "%sL konnte pg_control-Datei nicht schreiben: %s\n"
+
+#: pg_resetxlog.c:860 pg_resetxlog.c:1156
+#, c-format
+msgid "%s: fsync error: %s\n"
+msgstr "%s: fsync-Fehler: %s\n"
+
+#: pg_resetxlog.c:900 pg_resetxlog.c:971 pg_resetxlog.c:1022
+#, c-format
+msgid "%s: could not open directory \"%s\": %s\n"
+msgstr "%s: konnte Verzeichnis »%s« nicht öffnen: %s\n"
+
+#: pg_resetxlog.c:936 pg_resetxlog.c:993 pg_resetxlog.c:1047
+#, c-format
+msgid "%s: could not read directory \"%s\": %s\n"
+msgstr "%s: konnte Verzeichnis »%s« nicht lesen: %s\n"
+
+#: pg_resetxlog.c:943 pg_resetxlog.c:1000 pg_resetxlog.c:1054
+#, c-format
+msgid "%s: could not close directory \"%s\": %s\n"
+msgstr "%s: konnte Verzeichnis »%s« nicht schließen: %s\n"
+
+#: pg_resetxlog.c:984 pg_resetxlog.c:1038
+#, c-format
+msgid "%s: could not delete file \"%s\": %s\n"
+msgstr "%s: konnte Datei »%s« nicht löschen: %s\n"
+
+#: pg_resetxlog.c:1123
+#, c-format
+msgid "%s: could not open file \"%s\": %s\n"
+msgstr "%s: konnte Datei »%s« nicht öffnen: %s\n"
+
+#: pg_resetxlog.c:1134 pg_resetxlog.c:1148
+#, c-format
+msgid "%s: could not write file \"%s\": %s\n"
+msgstr "%s: konnte Datei »%s« nicht schreiben: %s\n"
+
+#: pg_resetxlog.c:1167
+#, c-format
+msgid ""
+"%s resets the PostgreSQL transaction log.\n"
+"\n"
+msgstr ""
+"%s setzt den PostgreSQL-Transaktionslog zurück.\n"
+"\n"
+
+#: pg_resetxlog.c:1168
+#, c-format
+msgid ""
+"Usage:\n"
+" %s [OPTION]... DATADIR\n"
+"\n"
+msgstr ""
+"Aufruf:\n"
+" %s [OPTION]... DATENVERZEICHNIS\n"
+"\n"
+
+#: pg_resetxlog.c:1169
+#, c-format
+msgid "Options:\n"
+msgstr "Optionen:\n"
+
+#: pg_resetxlog.c:1170
+#, c-format
+msgid " -c XID,XID set oldest and newest transactions bearing commit timestamp\n"
+msgstr " -c XID,XID älteste und neuste Transaktion mit Commit-Timestamp setzen\n"
+
+#: pg_resetxlog.c:1171
+#, c-format
+msgid " (zero in either value means no change)\n"
+msgstr " (Null in einem Wert bedeutet keine Änderung)\n"
+
+#: pg_resetxlog.c:1172
+#, c-format
+msgid " [-D] DATADIR data directory\n"
+msgstr " [-D] DATENVERZ Datenbankverzeichnis\n"
+
+#: pg_resetxlog.c:1173
+#, c-format
+msgid " -e XIDEPOCH set next transaction ID epoch\n"
+msgstr " -e XIDEPOCHE nächste Transaktions-ID-Epoche setzen\n"
+
+#: pg_resetxlog.c:1174
+#, c-format
+msgid " -f force update to be done\n"
+msgstr " -f Änderung erzwingen\n"
+
+#: pg_resetxlog.c:1175
+#, c-format
+msgid " -l XLOGFILE force minimum WAL starting location for new transaction log\n"
+msgstr " -l XLOGDATEI minimale WAL-Startposition für neuen Log erzwingen\n"
+
+#: pg_resetxlog.c:1176
+#, c-format
+msgid " -m MXID,MXID set next and oldest multitransaction ID\n"
+msgstr " -m MXID,MXID nächste und älteste Multitransaktions-ID setzen\n"
+
+#: pg_resetxlog.c:1177
+#, c-format
+msgid " -n no update, just show what would be done (for testing)\n"
+msgstr ""
+" -n keine Änderungen; nur zeigen, was gemacht werden würde (zum\n"
+" Testen)\n"
+
+#: pg_resetxlog.c:1178
+#, c-format
+msgid " -o OID set next OID\n"
+msgstr " -o OID nächste OID setzen\n"
+
+#: pg_resetxlog.c:1179
+#, c-format
+msgid " -O OFFSET set next multitransaction offset\n"
+msgstr " -O OFFSET nächsten Multitransaktions-Offset setzen\n"
+
+#: pg_resetxlog.c:1180
+#, c-format
+msgid " -V, --version output version information, then exit\n"
+msgstr " -V, --version Versionsinformationen anzeigen, dann beenden\n"
+
+#: pg_resetxlog.c:1181
+#, c-format
+msgid " -x XID set next transaction ID\n"
+msgstr " -x XID nächste Transaktions-ID setzen\n"
+
+#: pg_resetxlog.c:1182
+#, c-format
+msgid " -?, --help show this help, then exit\n"
+msgstr " -?, --help diese Hilfe anzeigen, dann beenden\n"
+
+#: pg_resetxlog.c:1183
+#, c-format
+msgid ""
+"\n"
+"Report bugs to .\n"
+msgstr ""
+"\n"
+"Berichten Sie Fehler an .\n"
diff --git a/src/bin/pg_resetwal/po/es.po b/src/bin/pg_resetwal/po/es.po
new file mode 100644
index 0000000000..fec8ea973c
--- /dev/null
+++ b/src/bin/pg_resetwal/po/es.po
@@ -0,0 +1,611 @@
+# Spanish message translation file for pg_resetxlog
+#
+# Copyright (C) 2003-2012 PostgreSQL Global Development Group
+# This file is distributed under the same license as the PostgreSQL package.
+#
+# Ivan Hernandez , 2003.
+# Alvaro Herrera , 2004-2014
+# Jaime Casanova , 2005
+# Martín Marqués , 2013-2014
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: pg_resetxlog (PostgreSQL 9.6)\n"
+"Report-Msgid-Bugs-To: pgsql-bugs@postgresql.org\n"
+"POT-Creation-Date: 2016-05-02 20:14+0000\n"
+"PO-Revision-Date: 2016-05-24 10:56-0500\n"
+"Last-Translator: Carlos Chapi \n"
+"Language-Team: Español \n"
+"Language: es\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"X-Generator: Poedit 1.8.7\n"
+
+#: ../../common/restricted_token.c:68
+#, c-format
+msgid "%s: WARNING: cannot create restricted tokens on this platform\n"
+msgstr "%s: ATENCIÓN: no se pueden crear tokens restrigidos en esta plataforma\n"
+
+#: ../../common/restricted_token.c:77
+#, c-format
+msgid "%s: could not open process token: error code %lu\n"
+msgstr "%s: no se pudo abrir el token de proceso: código de error %lu\n"
+
+#: ../../common/restricted_token.c:90
+#, c-format
+msgid "%s: could not allocate SIDs: error code %lu\n"
+msgstr "%s: no se pudo emplazar los SIDs: código de error %lu\n"
+
+#: ../../common/restricted_token.c:110
+#, c-format
+msgid "%s: could not create restricted token: error code %lu\n"
+msgstr "%s: no se pudo crear el token restringido: código de error %lu\n"
+
+#: ../../common/restricted_token.c:132
+#, c-format
+msgid "%s: could not start process for command \"%s\": error code %lu\n"
+msgstr "%s: no se pudo iniciar el proceso para la orden «%s»: código de error %lu\n"
+
+#: ../../common/restricted_token.c:170
+#, c-format
+msgid "%s: could not re-execute with restricted token: error code %lu\n"
+msgstr "%s: no se pudo re-ejecutar con el token restringido: código de error %lu\n"
+
+#: ../../common/restricted_token.c:186
+#, c-format
+msgid "%s: could not get exit code from subprocess: error code %lu\n"
+msgstr "%s: no se pudo obtener el código de salida del subproceso»: código de error %lu\n"
+
+#. translator: the second %s is a command line argument (-e, etc)
+#: pg_resetxlog.c:140 pg_resetxlog.c:155 pg_resetxlog.c:170 pg_resetxlog.c:177
+#: pg_resetxlog.c:201 pg_resetxlog.c:216 pg_resetxlog.c:224 pg_resetxlog.c:250
+#: pg_resetxlog.c:264
+#, c-format
+msgid "%s: invalid argument for option %s\n"
+msgstr "%s: argumento no válido para la opción %s\n"
+
+#: pg_resetxlog.c:141 pg_resetxlog.c:156 pg_resetxlog.c:171 pg_resetxlog.c:178
+#: pg_resetxlog.c:202 pg_resetxlog.c:217 pg_resetxlog.c:225 pg_resetxlog.c:251
+#: pg_resetxlog.c:265 pg_resetxlog.c:272 pg_resetxlog.c:285 pg_resetxlog.c:293
+#, c-format
+msgid "Try \"%s --help\" for more information.\n"
+msgstr "Prueba con «%s --help» para más información\n"
+
+#: pg_resetxlog.c:146
+#, c-format
+msgid "%s: transaction ID epoch (-e) must not be -1\n"
+msgstr "%s: el «epoch» de ID de transacción (-e) no debe ser -1\n"
+
+#: pg_resetxlog.c:161
+#, c-format
+msgid "%s: transaction ID (-x) must not be 0\n"
+msgstr "%s: el ID de transacción (-x) no debe ser 0\n"
+
+#: pg_resetxlog.c:185 pg_resetxlog.c:192
+#, c-format
+msgid "%s: transaction ID (-c) must be either 0 or greater than or equal to 2\n"
+msgstr "%s: el ID de transacción (-c) debe ser 0 o bien mayor o igual a 2\n"
+
+#: pg_resetxlog.c:207
+#, c-format
+msgid "%s: OID (-o) must not be 0\n"
+msgstr "%s: OID (-o) no debe ser cero\n"
+
+#: pg_resetxlog.c:230
+#, c-format
+msgid "%s: multitransaction ID (-m) must not be 0\n"
+msgstr "%s: el ID de multitransacción (-m) no debe ser 0\n"
+
+#: pg_resetxlog.c:240
+#, c-format
+msgid "%s: oldest multitransaction ID (-m) must not be 0\n"
+msgstr "%s: el ID de multitransacción más antiguo (-m) no debe ser 0\n"
+
+#: pg_resetxlog.c:256
+#, c-format
+msgid "%s: multitransaction offset (-O) must not be -1\n"
+msgstr "%s: la posición de multitransacción (-O) no debe ser -1\n"
+
+#: pg_resetxlog.c:283
+#, c-format
+msgid "%s: too many command-line arguments (first is \"%s\")\n"
+msgstr "%s: demasiados argumentos de línea de órdenes (el primero es «%s»)\n"
+
+#: pg_resetxlog.c:292
+#, c-format
+msgid "%s: no data directory specified\n"
+msgstr "%s: directorio de datos no especificado\n"
+
+#: pg_resetxlog.c:306
+#, c-format
+msgid "%s: cannot be executed by \"root\"\n"
+msgstr "%s: no puede ser ejecutado con el usuario «root»\n"
+
+#: pg_resetxlog.c:308
+#, c-format
+msgid "You must run %s as the PostgreSQL superuser.\n"
+msgstr "Debe ejecutar %s con el superusuario de PostgreSQL.\n"
+
+#: pg_resetxlog.c:318
+#, c-format
+msgid "%s: could not change directory to \"%s\": %s\n"
+msgstr "%s: no se pudo cambiar al directorio «%s»: %s\n"
+
+#: pg_resetxlog.c:331 pg_resetxlog.c:477
+#, c-format
+msgid "%s: could not open file \"%s\" for reading: %s\n"
+msgstr "%s: no se pudo abrir el archivo «%s» para lectura: %s\n"
+
+#: pg_resetxlog.c:338
+#, c-format
+msgid ""
+"%s: lock file \"%s\" exists\n"
+"Is a server running? If not, delete the lock file and try again.\n"
+msgstr ""
+"%s: el archivo candado «%s» existe\n"
+"¿Hay un servidor corriendo? Si no, borre el archivo candado e inténtelo de nuevo\n"
+
+#: pg_resetxlog.c:425
+#, c-format
+msgid ""
+"\n"
+"If these values seem acceptable, use -f to force reset.\n"
+msgstr ""
+"\n"
+"Si estos valores parecen aceptables, use -f para forzar reinicio.\n"
+
+#: pg_resetxlog.c:437
+#, c-format
+msgid ""
+"The database server was not shut down cleanly.\n"
+"Resetting the transaction log might cause data to be lost.\n"
+"If you want to proceed anyway, use -f to force reset.\n"
+msgstr ""
+"El servidor de base de datos no fue terminado limpiamente.\n"
+"Reiniciar la bitácora de transacciones puede causar pérdida de datos.\n"
+"Si de todas formas quiere proceder, use -f para forzar su reinicio.\n"
+
+#: pg_resetxlog.c:451
+#, c-format
+msgid "Transaction log reset\n"
+msgstr "Bitácora de transacciones reiniciada\n"
+
+#: pg_resetxlog.c:480
+#, c-format
+msgid ""
+"If you are sure the data directory path is correct, execute\n"
+" touch %s\n"
+"and try again.\n"
+msgstr ""
+"Si está seguro que la ruta al directorio de datos es correcta, ejecute\n"
+" touch %s\n"
+"y pruebe de nuevo.\n"
+
+#: pg_resetxlog.c:493
+#, c-format
+msgid "%s: could not read file \"%s\": %s\n"
+msgstr "%s: no se pudo leer el archivo «%s»: %s\n"
+
+#: pg_resetxlog.c:516
+#, c-format
+msgid "%s: pg_control exists but has invalid CRC; proceed with caution\n"
+msgstr "%s: existe pg_control pero tiene un CRC no válido, proceda con precaución\n"
+
+#: pg_resetxlog.c:525
+#, c-format
+msgid "%s: pg_control exists but is broken or unknown version; ignoring it\n"
+msgstr "%s: existe pg_control pero está roto o se desconoce su versión; ignorándolo\n"
+
+#: pg_resetxlog.c:628
+#, c-format
+msgid ""
+"Guessed pg_control values:\n"
+"\n"
+msgstr ""
+"Valores de pg_control asumidos:\n"
+"\n"
+
+#: pg_resetxlog.c:630
+#, c-format
+msgid ""
+"Current pg_control values:\n"
+"\n"
+msgstr ""
+"Valores actuales de pg_control:\n"
+"\n"
+
+#: pg_resetxlog.c:639
+#, c-format
+msgid "pg_control version number: %u\n"
+msgstr "Número de versión de pg_control: %u\n"
+
+#: pg_resetxlog.c:641
+#, c-format
+msgid "Catalog version number: %u\n"
+msgstr "Número de versión de catálogo: %u\n"
+
+#: pg_resetxlog.c:643
+#, c-format
+msgid "Database system identifier: %s\n"
+msgstr "Identificador de sistema: %s\n"
+
+#: pg_resetxlog.c:645
+#, c-format
+msgid "Latest checkpoint's TimeLineID: %u\n"
+msgstr "TimeLineID del checkpoint más reciente: %u\n"
+
+#: pg_resetxlog.c:647
+#, c-format
+msgid "Latest checkpoint's full_page_writes: %s\n"
+msgstr "full_page_writes del checkpoint más reciente: %s\n"
+
+#: pg_resetxlog.c:648
+msgid "off"
+msgstr "desactivado"
+
+#: pg_resetxlog.c:648
+msgid "on"
+msgstr "activado"
+
+#: pg_resetxlog.c:649
+#, c-format
+msgid "Latest checkpoint's NextXID: %u:%u\n"
+msgstr "NextXID del checkpoint más reciente: %u:%u\n"
+
+#: pg_resetxlog.c:652
+#, c-format
+msgid "Latest checkpoint's NextOID: %u\n"
+msgstr "NextOID del checkpoint más reciente: %u\n"
+
+#: pg_resetxlog.c:654
+#, c-format
+msgid "Latest checkpoint's NextMultiXactId: %u\n"
+msgstr "NextMultiXactId del checkpoint más reciente: %u\n"
+
+#: pg_resetxlog.c:656
+#, c-format
+msgid "Latest checkpoint's NextMultiOffset: %u\n"
+msgstr "NextMultiOffset del checkpoint más reciente: %u\n"
+
+#: pg_resetxlog.c:658
+#, c-format
+msgid "Latest checkpoint's oldestXID: %u\n"
+msgstr "oldestXID del checkpoint más reciente: %u\n"
+
+#: pg_resetxlog.c:660
+#, c-format
+msgid "Latest checkpoint's oldestXID's DB: %u\n"
+msgstr "BD del oldestXID del checkpoint más reciente: %u\n"
+
+#: pg_resetxlog.c:662
+#, c-format
+msgid "Latest checkpoint's oldestActiveXID: %u\n"
+msgstr "oldestActiveXID del checkpoint más reciente: %u\n"
+
+#: pg_resetxlog.c:664
+#, c-format
+msgid "Latest checkpoint's oldestMultiXid: %u\n"
+msgstr "oldestMultiXid del checkpoint más reciente: %u\n"
+
+#: pg_resetxlog.c:666
+#, c-format
+msgid "Latest checkpoint's oldestMulti's DB: %u\n"
+msgstr "BD del oldestMultiXid del checkpt. más reciente: %u\n"
+
+#: pg_resetxlog.c:668
+#, c-format
+msgid "Latest checkpoint's oldestCommitTsXid:%u\n"
+msgstr "oldestCommitTsXid del último checkpoint: %u\n"
+
+#: pg_resetxlog.c:670
+#, c-format
+msgid "Latest checkpoint's newestCommitTsXid:%u\n"
+msgstr "newestCommitTsXid del último checkpoint: %u\n"
+
+#: pg_resetxlog.c:672
+#, c-format
+msgid "Maximum data alignment: %u\n"
+msgstr "Máximo alineamiento de datos: %u\n"
+
+#: pg_resetxlog.c:675
+#, c-format
+msgid "Database block size: %u\n"
+msgstr "Tamaño del bloque de la base de datos: %u\n"
+
+#: pg_resetxlog.c:677
+#, c-format
+msgid "Blocks per segment of large relation: %u\n"
+msgstr "Bloques por segmento de relación grande: %u\n"
+
+#: pg_resetxlog.c:679
+#, c-format
+msgid "WAL block size: %u\n"
+msgstr "Tamaño del bloque de WAL: %u\n"
+
+#: pg_resetxlog.c:681
+#, c-format
+msgid "Bytes per WAL segment: %u\n"
+msgstr "Bytes por segmento WAL: %u\n"
+
+#: pg_resetxlog.c:683
+#, c-format
+msgid "Maximum length of identifiers: %u\n"
+msgstr "Longitud máxima de identificadores: %u\n"
+
+#: pg_resetxlog.c:685
+#, c-format
+msgid "Maximum columns in an index: %u\n"
+msgstr "Máximo número de columnas en un índice: %u\n"
+
+#: pg_resetxlog.c:687
+#, c-format
+msgid "Maximum size of a TOAST chunk: %u\n"
+msgstr "Longitud máxima de un trozo TOAST: %u\n"
+
+#: pg_resetxlog.c:689
+#, c-format
+msgid "Size of a large-object chunk: %u\n"
+msgstr "Longitud máxima de un trozo de objeto grande: %u\n"
+
+#: pg_resetxlog.c:691
+#, c-format
+msgid "Date/time type storage: %s\n"
+msgstr "Tipo de almacenamiento hora/fecha: %s\n"
+
+#: pg_resetxlog.c:692
+msgid "64-bit integers"
+msgstr "enteros de 64 bits"
+
+#: pg_resetxlog.c:692
+msgid "floating-point numbers"
+msgstr "números de coma flotante"
+
+#: pg_resetxlog.c:693
+#, c-format
+msgid "Float4 argument passing: %s\n"
+msgstr "Paso de parámetros float4: %s\n"
+
+#: pg_resetxlog.c:694 pg_resetxlog.c:696
+msgid "by reference"
+msgstr "por referencia"
+
+#: pg_resetxlog.c:694 pg_resetxlog.c:696
+msgid "by value"
+msgstr "por valor"
+
+#: pg_resetxlog.c:695
+#, c-format
+msgid "Float8 argument passing: %s\n"
+msgstr "Paso de parámetros float8: %s\n"
+
+#: pg_resetxlog.c:697
+#, c-format
+msgid "Data page checksum version: %u\n"
+msgstr "Versión de suma de verificación de datos: %u\n"
+
+#: pg_resetxlog.c:711
+#, c-format
+msgid ""
+"\n"
+"\n"
+"Values to be changed:\n"
+"\n"
+msgstr ""
+"\n"
+"\n"
+"Valores a cambiar:\n"
+"\n"
+
+#: pg_resetxlog.c:714
+#, c-format
+msgid "First log segment after reset: %s\n"
+msgstr "Primer segmento de log después de reiniciar: %s\n"
+
+#: pg_resetxlog.c:718
+#, c-format
+msgid "NextMultiXactId: %u\n"
+msgstr "NextMultiXactId: %u\n"
+
+#: pg_resetxlog.c:720
+#, c-format
+msgid "OldestMultiXid: %u\n"
+msgstr "OldestMultiXid: %u\n"
+
+#: pg_resetxlog.c:722
+#, c-format
+msgid "OldestMulti's DB: %u\n"
+msgstr "Base de datos del OldestMulti: %u\n"
+
+#: pg_resetxlog.c:728
+#, c-format
+msgid "NextMultiOffset: %u\n"
+msgstr "NextMultiOffset: %u\n"
+
+#: pg_resetxlog.c:734
+#, c-format
+msgid "NextOID: %u\n"
+msgstr "NextOID: %u\n"
+
+#: pg_resetxlog.c:740
+#, c-format
+msgid "NextXID: %u\n"
+msgstr "NextXID: %u\n"
+
+#: pg_resetxlog.c:742
+#, c-format
+msgid "OldestXID: %u\n"
+msgstr "OldestXID: %u\n"
+
+#: pg_resetxlog.c:744
+#, c-format
+msgid "OldestXID's DB: %u\n"
+msgstr "Base de datos del OldestXID: %u\n"
+
+#: pg_resetxlog.c:750
+#, c-format
+msgid "NextXID epoch: %u\n"
+msgstr "Epoch del NextXID: %u\n"
+
+#: pg_resetxlog.c:756
+#, c-format
+msgid "oldestCommitTsXid: %u\n"
+msgstr "oldestCommitTsXid: %u\n"
+
+#: pg_resetxlog.c:761
+#, c-format
+msgid "newestCommitTsXid: %u\n"
+msgstr "newestCommitTsXid: %u\n"
+
+#: pg_resetxlog.c:827
+#, c-format
+msgid "%s: internal error -- sizeof(ControlFileData) is too large ... fix PG_CONTROL_SIZE\n"
+msgstr "%s: error interno -- sizeof(ControlFileData) es demasiado grande ... corrija PG_CONTROL_SIZE\n"
+
+#: pg_resetxlog.c:842
+#, c-format
+msgid "%s: could not create pg_control file: %s\n"
+msgstr "%s: no se pudo crear el archivo pg_control: %s\n"
+
+#: pg_resetxlog.c:853
+#, c-format
+msgid "%s: could not write pg_control file: %s\n"
+msgstr "%s: no se pudo escribir el archivo pg_control: %s\n"
+
+#: pg_resetxlog.c:860 pg_resetxlog.c:1156
+#, c-format
+msgid "%s: fsync error: %s\n"
+msgstr "%s: error de fsync: %s\n"
+
+#: pg_resetxlog.c:900 pg_resetxlog.c:971 pg_resetxlog.c:1022
+#, c-format
+msgid "%s: could not open directory \"%s\": %s\n"
+msgstr "%s: no se pudo abrir el directorio «%s»: %s\n"
+
+#: pg_resetxlog.c:936 pg_resetxlog.c:993 pg_resetxlog.c:1047
+#, c-format
+msgid "%s: could not read directory \"%s\": %s\n"
+msgstr "%s: no se pudo leer el directorio «%s»: %s\n"
+
+#: pg_resetxlog.c:943 pg_resetxlog.c:1000 pg_resetxlog.c:1054
+#, c-format
+msgid "%s: could not close directory \"%s\": %s\n"
+msgstr "%s: no se pudo cerrar el directorio «%s»: %s\n"
+
+#: pg_resetxlog.c:984 pg_resetxlog.c:1038
+#, c-format
+msgid "%s: could not delete file \"%s\": %s\n"
+msgstr "%s: no se pudo borrar el archivo «%s»: %s\n"
+
+#: pg_resetxlog.c:1123
+#, c-format
+msgid "%s: could not open file \"%s\": %s\n"
+msgstr "%s: no se pudo abrir el archivo «%s»: %s\n"
+
+#: pg_resetxlog.c:1134 pg_resetxlog.c:1148
+#, c-format
+msgid "%s: could not write file \"%s\": %s\n"
+msgstr "%s: no se pudo escribir en el archivo «%s»: %s\n"
+
+#: pg_resetxlog.c:1167
+#, c-format
+msgid ""
+"%s resets the PostgreSQL transaction log.\n"
+"\n"
+msgstr ""
+"%s reinicia la bitácora de transacciones de PostgreSQL.\n"
+"\n"
+
+#: pg_resetxlog.c:1168
+#, c-format
+msgid ""
+"Usage:\n"
+" %s [OPTION]... DATADIR\n"
+"\n"
+msgstr ""
+"Uso:\n"
+" %s [OPCIÓN]... DATADIR\n"
+"\n"
+
+#: pg_resetxlog.c:1169
+#, c-format
+msgid "Options:\n"
+msgstr "Opciones:\n"
+
+#: pg_resetxlog.c:1170
+#, c-format
+msgid " -c XID,XID set oldest and newest transactions bearing commit timestamp\n"
+msgstr ""
+" -c XID,XID asigna los ID de transacciones más antiguo y más nuevo que llevan\n"
+" timestamp de commit\n"
+
+#: pg_resetxlog.c:1171
+#, c-format
+msgid " (zero in either value means no change)\n"
+msgstr " (cero en cualquiera de ellos significa no cambiar)\n"
+
+#: pg_resetxlog.c:1172
+#, c-format
+msgid " [-D] DATADIR data directory\n"
+msgstr " [-D] DATADIR directorio de datos\n"
+
+#: pg_resetxlog.c:1173
+#, c-format
+msgid " -e XIDEPOCH set next transaction ID epoch\n"
+msgstr " -e XIDEPOCH asigna el siguiente «epoch» de ID de transacción\n"
+
+#: pg_resetxlog.c:1174
+#, c-format
+msgid " -f force update to be done\n"
+msgstr " -f fuerza que la actualización sea hecha\n"
+
+#: pg_resetxlog.c:1175
+#, c-format
+msgid " -l XLOGFILE force minimum WAL starting location for new transaction log\n"
+msgstr ""
+" -l XLOGFILE fuerza una posición mínima de inicio de WAL para una\n"
+" nueva transacción\n"
+
+#: pg_resetxlog.c:1176
+#, c-format
+msgid " -m MXID,MXID set next and oldest multitransaction ID\n"
+msgstr " -m MXID,MXID asigna el siguiente ID de multitransacción y el más antiguo\n"
+
+#: pg_resetxlog.c:1177
+#, c-format
+msgid " -n no update, just show what would be done (for testing)\n"
+msgstr " -n no actualiza, sólo muestra lo que va a hacer (para pruebas)\n"
+
+#: pg_resetxlog.c:1178
+#, c-format
+msgid " -o OID set next OID\n"
+msgstr " -o OID asigna el siguiente OID\n"
+
+#: pg_resetxlog.c:1179
+#, c-format
+msgid " -O OFFSET set next multitransaction offset\n"
+msgstr " -O OFFSET asigna la siguiente posición de multitransacción\n"
+
+#: pg_resetxlog.c:1180
+#, c-format
+msgid " -V, --version output version information, then exit\n"
+msgstr " -V, --version muestra información de la versión, luego sale\n"
+
+#: pg_resetxlog.c:1181
+#, c-format
+msgid " -x XID set next transaction ID\n"
+msgstr " -x XID asigna el siguiente ID de transacción\n"
+
+#: pg_resetxlog.c:1182
+#, c-format
+msgid " -?, --help show this help, then exit\n"
+msgstr " -?, --help muestra esta ayuda, luego sale\n"
+
+#: pg_resetxlog.c:1183
+#, c-format
+msgid ""
+"\n"
+"Report bugs to .\n"
+msgstr ""
+"\n"
+"Reporte errores a .\n"
diff --git a/src/bin/pg_resetwal/po/fr.po b/src/bin/pg_resetwal/po/fr.po
new file mode 100644
index 0000000000..1a9a57f77f
--- /dev/null
+++ b/src/bin/pg_resetwal/po/fr.po
@@ -0,0 +1,644 @@
+# translation of pg_resetxlog.po to fr_fr
+# french message translation file for pg_resetxlog
+#
+# Use these quotes: � %s �
+#
+# Guillaume Lelarge , 2003-2009.
+# St�phane Schildknecht , 2009.
+msgid ""
+msgstr ""
+"Project-Id-Version: PostgreSQL 9.6\n"
+"Report-Msgid-Bugs-To: pgsql-bugs@postgresql.org\n"
+"POT-Creation-Date: 2016-05-08 21:14+0000\n"
+"PO-Revision-Date: 2016-05-09 10:28+0200\n"
+"Last-Translator: Guillaume Lelarge \n"
+"Language-Team: PostgreSQLfr \n"
+"Language: fr\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=ISO-8859-15\n"
+"Content-Transfer-Encoding: 8bit\n"
+"X-Generator: Poedit 1.8.7.1\n"
+
+#: ../../common/restricted_token.c:68
+#, c-format
+msgid "%s: WARNING: cannot create restricted tokens on this platform\n"
+msgstr "%s : ATTENTION : ne peut pas cr�r les jetons restreints sur cette plateforme\n"
+
+#: ../../common/restricted_token.c:77
+#, c-format
+msgid "%s: could not open process token: error code %lu\n"
+msgstr "%s : n'a pas pu ouvrir le jeton du processus : code d'erreur %lu\n"
+
+#: ../../common/restricted_token.c:90
+#, c-format
+msgid "%s: could not allocate SIDs: error code %lu\n"
+msgstr "%s : n'a pas pu allouer les SID : code d'erreur %lu\n"
+
+#: ../../common/restricted_token.c:110
+#, c-format
+msgid "%s: could not create restricted token: error code %lu\n"
+msgstr "%s : n'a pas pu cr�er le jeton restreint : code d'erreur %lu\n"
+
+#: ../../common/restricted_token.c:132
+#, c-format
+msgid "%s: could not start process for command \"%s\": error code %lu\n"
+msgstr "%s : n'a pas pu d�marrer le processus pour la commande � %s � : code d'erreur %lu\n"
+
+#: ../../common/restricted_token.c:170
+#, c-format
+msgid "%s: could not re-execute with restricted token: error code %lu\n"
+msgstr "%s : n'a pas pu r�-ex�cuter le jeton restreint : code d'erreur %lu\n"
+
+#: ../../common/restricted_token.c:186
+#, c-format
+msgid "%s: could not get exit code from subprocess: error code %lu\n"
+msgstr "%s : n'a pas pu r�cup�rer le code de statut du sous-processus : code d'erreur %lu\n"
+
+#. translator: the second %s is a command line argument (-e, etc)
+#: pg_resetxlog.c:140 pg_resetxlog.c:155 pg_resetxlog.c:170 pg_resetxlog.c:177
+#: pg_resetxlog.c:201 pg_resetxlog.c:216 pg_resetxlog.c:224 pg_resetxlog.c:250
+#: pg_resetxlog.c:264
+#, c-format
+msgid "%s: invalid argument for option %s\n"
+msgstr "%s : argument invalide pour l'option %s\n"
+
+#: pg_resetxlog.c:141 pg_resetxlog.c:156 pg_resetxlog.c:171 pg_resetxlog.c:178
+#: pg_resetxlog.c:202 pg_resetxlog.c:217 pg_resetxlog.c:225 pg_resetxlog.c:251
+#: pg_resetxlog.c:265 pg_resetxlog.c:272 pg_resetxlog.c:285 pg_resetxlog.c:293
+#, c-format
+msgid "Try \"%s --help\" for more information.\n"
+msgstr "Essayer � %s --help � pour plus d'informations.\n"
+
+#: pg_resetxlog.c:146
+#, c-format
+msgid "%s: transaction ID epoch (-e) must not be -1\n"
+msgstr ""
+"%s : la valeur epoch de l'identifiant de transaction (-e) ne doit pas �tre\n"
+"-1\n"
+
+#: pg_resetxlog.c:161
+#, c-format
+msgid "%s: transaction ID (-x) must not be 0\n"
+msgstr "%s : l'identifiant de la transaction (-x) ne doit pas �tre 0\n"
+
+#: pg_resetxlog.c:185 pg_resetxlog.c:192
+#, c-format
+msgid "%s: transaction ID (-c) must be either 0 or greater than or equal to 2\n"
+msgstr "%s : l'identifiant de transaction (-c) doit �tre 0 ou sup�rieur ou �gal � 2\n"
+
+#: pg_resetxlog.c:207
+#, c-format
+msgid "%s: OID (-o) must not be 0\n"
+msgstr "%s : l'OID (-o) ne doit pas �tre 0\n"
+
+#: pg_resetxlog.c:230
+#, c-format
+msgid "%s: multitransaction ID (-m) must not be 0\n"
+msgstr "%s : l'identifiant de multi-transaction (-m) ne doit pas �tre 0\n"
+
+#: pg_resetxlog.c:240
+#, c-format
+msgid "%s: oldest multitransaction ID (-m) must not be 0\n"
+msgstr "%s : l'identifiant de multi-transaction le plus ancien (-m) ne doit pas �tre 0\n"
+
+#: pg_resetxlog.c:256
+#, c-format
+msgid "%s: multitransaction offset (-O) must not be -1\n"
+msgstr "%s : le d�calage de multi-transaction (-O) ne doit pas �tre -1\n"
+
+#: pg_resetxlog.c:283
+#, c-format
+msgid "%s: too many command-line arguments (first is \"%s\")\n"
+msgstr "%s : trop d'arguments en ligne de commande (le premier �tant � %s �)\n"
+
+#: pg_resetxlog.c:292
+#, c-format
+msgid "%s: no data directory specified\n"
+msgstr "%s : aucun r�pertoire de donn�es indiqu�\n"
+
+#: pg_resetxlog.c:306
+#, c-format
+msgid "%s: cannot be executed by \"root\"\n"
+msgstr "%s : ne peut pas �tre ex�cut� par � root �\n"
+
+#: pg_resetxlog.c:308
+#, c-format
+msgid "You must run %s as the PostgreSQL superuser.\n"
+msgstr "Vous devez ex�cuter %s en tant que super-utilisateur PostgreSQL.\n"
+
+#: pg_resetxlog.c:318
+#, c-format
+msgid "%s: could not change directory to \"%s\": %s\n"
+msgstr "%s : n'a pas pu acc�der au r�pertoire � %s � : %s\n"
+
+#: pg_resetxlog.c:331 pg_resetxlog.c:477
+#, c-format
+msgid "%s: could not open file \"%s\" for reading: %s\n"
+msgstr "%s : n'a pas pu ouvrir le fichier � %s � en lecture : %s\n"
+
+#: pg_resetxlog.c:338
+#, c-format
+msgid ""
+"%s: lock file \"%s\" exists\n"
+"Is a server running? If not, delete the lock file and try again.\n"
+msgstr ""
+"%s : le verrou � %s � existe\n"
+"Le serveur est-il d�marr� ? Sinon, supprimer le fichier verrou et r�essayer.\n"
+
+#: pg_resetxlog.c:425
+#, c-format
+msgid ""
+"\n"
+"If these values seem acceptable, use -f to force reset.\n"
+msgstr ""
+"\n"
+"Si ces valeurs semblent acceptables, utiliser -f pour forcer la\n"
+"r�initialisation.\n"
+
+#: pg_resetxlog.c:437
+#, c-format
+msgid ""
+"The database server was not shut down cleanly.\n"
+"Resetting the transaction log might cause data to be lost.\n"
+"If you want to proceed anyway, use -f to force reset.\n"
+msgstr ""
+"Le serveur de bases de donn�es n'a pas �t� arr�t� proprement.\n"
+"R�-initialiser le journal des transactions peut occasionner des pertes de\n"
+"donn�es.\n"
+"Pour continuer malgr� tout, utiliser -f pour forcer la\n"
+"r�initialisation.\n"
+
+#: pg_resetxlog.c:451
+#, c-format
+msgid "Transaction log reset\n"
+msgstr "R�initialisation du journal des transactions\n"
+
+#: pg_resetxlog.c:480
+#, c-format
+msgid ""
+"If you are sure the data directory path is correct, execute\n"
+" touch %s\n"
+"and try again.\n"
+msgstr ""
+"Si le chemin du r�pertoire de donn�es est correct, ex�cuter\n"
+" touch %s\n"
+"et r�essayer.\n"
+
+#: pg_resetxlog.c:493
+#, c-format
+msgid "%s: could not read file \"%s\": %s\n"
+msgstr "%s : n'a pas pu lire le fichier � %s � : %s\n"
+
+#: pg_resetxlog.c:516
+#, c-format
+msgid "%s: pg_control exists but has invalid CRC; proceed with caution\n"
+msgstr "%s : pg_control existe mais son CRC est invalide ; agir avec pr�caution\n"
+
+#: pg_resetxlog.c:525
+#, c-format
+msgid "%s: pg_control exists but is broken or unknown version; ignoring it\n"
+msgstr "%s : pg_control existe mais est corrompu ou de version inconnue ; ignor�\n"
+
+#: pg_resetxlog.c:628
+#, c-format
+msgid ""
+"Guessed pg_control values:\n"
+"\n"
+msgstr ""
+"Valeurs de pg_control devin�es :\n"
+"\n"
+
+#: pg_resetxlog.c:630
+#, c-format
+msgid ""
+"Current pg_control values:\n"
+"\n"
+msgstr ""
+"Valeurs actuelles de pg_control :\n"
+"\n"
+
+#: pg_resetxlog.c:639
+#, c-format
+msgid "pg_control version number: %u\n"
+msgstr "Num�ro de version de pg_control : %u\n"
+
+#: pg_resetxlog.c:641
+#, c-format
+msgid "Catalog version number: %u\n"
+msgstr "Num�ro de version du catalogue : %u\n"
+
+#: pg_resetxlog.c:643
+#, c-format
+msgid "Database system identifier: %s\n"
+msgstr "Identifiant du syst�me de base de donn�es : %s\n"
+
+#: pg_resetxlog.c:645
+#, c-format
+msgid "Latest checkpoint's TimeLineID: %u\n"
+msgstr "Dernier TimeLineID du point de contr�le : %u\n"
+
+#: pg_resetxlog.c:647
+#, c-format
+msgid "Latest checkpoint's full_page_writes: %s\n"
+msgstr "Dernier full_page_writes du point de contr�le : %s\n"
+
+#: pg_resetxlog.c:648
+msgid "off"
+msgstr "d�sactiv�"
+
+#: pg_resetxlog.c:648
+msgid "on"
+msgstr "activ�"
+
+#: pg_resetxlog.c:649
+#, c-format
+msgid "Latest checkpoint's NextXID: %u:%u\n"
+msgstr "Dernier NextXID du point de contr�le : %u:%u\n"
+
+#: pg_resetxlog.c:652
+#, c-format
+msgid "Latest checkpoint's NextOID: %u\n"
+msgstr "Dernier NextOID du point de contr�le : %u\n"
+
+#: pg_resetxlog.c:654
+#, c-format
+msgid "Latest checkpoint's NextMultiXactId: %u\n"
+msgstr "Dernier NextMultiXactId du point de contr�le : %u\n"
+
+#: pg_resetxlog.c:656
+#, c-format
+msgid "Latest checkpoint's NextMultiOffset: %u\n"
+msgstr "Dernier NextMultiOffset du point de contr�le : %u\n"
+
+#: pg_resetxlog.c:658
+#, c-format
+msgid "Latest checkpoint's oldestXID: %u\n"
+msgstr "Dernier oldestXID du point de contr�le : %u\n"
+
+#: pg_resetxlog.c:660
+#, c-format
+msgid "Latest checkpoint's oldestXID's DB: %u\n"
+msgstr "Dernier oldestXID du point de contr�le de la base : %u\n"
+
+#: pg_resetxlog.c:662
+#, c-format
+msgid "Latest checkpoint's oldestActiveXID: %u\n"
+msgstr "Dernier oldestActiveXID du point de contr�le : %u\n"
+
+#: pg_resetxlog.c:664
+#, c-format
+msgid "Latest checkpoint's oldestMultiXid: %u\n"
+msgstr "Dernier oldestMultiXID du point de contr�le : %u\n"
+
+#: pg_resetxlog.c:666
+#, c-format
+msgid "Latest checkpoint's oldestMulti's DB: %u\n"
+msgstr "Dernier oldestMulti du point de contr�le de la base : %u\n"
+
+#: pg_resetxlog.c:668
+#, c-format
+msgid "Latest checkpoint's oldestCommitTsXid:%u\n"
+msgstr "Dernier oldestCommitTsXid du point de contr�le : %u\n"
+
+#: pg_resetxlog.c:670
+#, c-format
+msgid "Latest checkpoint's newestCommitTsXid:%u\n"
+msgstr "Dernier newestCommitTsXid du point de contr�le : %u\n"
+
+#: pg_resetxlog.c:672
+#, c-format
+msgid "Maximum data alignment: %u\n"
+msgstr "Alignement maximal des donn�es : %u\n"
+
+#: pg_resetxlog.c:675
+#, c-format
+msgid "Database block size: %u\n"
+msgstr "Taille du bloc de la base de donn�es : %u\n"
+
+#: pg_resetxlog.c:677
+#, c-format
+msgid "Blocks per segment of large relation: %u\n"
+msgstr "Blocs par segment des relations volumineuses : %u\n"
+
+#: pg_resetxlog.c:679
+#, c-format
+msgid "WAL block size: %u\n"
+msgstr "Taille de bloc du journal de transaction : %u\n"
+
+#: pg_resetxlog.c:681
+#, c-format
+msgid "Bytes per WAL segment: %u\n"
+msgstr "Octets par segment du journal de transaction : %u\n"
+
+#: pg_resetxlog.c:683
+#, c-format
+msgid "Maximum length of identifiers: %u\n"
+msgstr "Longueur maximale des identifiants : %u\n"
+
+#: pg_resetxlog.c:685
+#, c-format
+msgid "Maximum columns in an index: %u\n"
+msgstr "Nombre maximal de colonnes d'un index: %u\n"
+
+#: pg_resetxlog.c:687
+#, c-format
+msgid "Maximum size of a TOAST chunk: %u\n"
+msgstr "Longueur maximale d'un morceau TOAST : %u\n"
+
+#: pg_resetxlog.c:689
+#, c-format
+msgid "Size of a large-object chunk: %u\n"
+msgstr "Taille d'un morceau de Large Object : %u\n"
+
+#: pg_resetxlog.c:691
+#, c-format
+msgid "Date/time type storage: %s\n"
+msgstr "Stockage du type date/heure : %s\n"
+
+#: pg_resetxlog.c:692
+msgid "64-bit integers"
+msgstr "entiers 64-bits"
+
+#: pg_resetxlog.c:692
+msgid "floating-point numbers"
+msgstr "nombres � virgule flottante"
+
+#: pg_resetxlog.c:693
+#, c-format
+msgid "Float4 argument passing: %s\n"
+msgstr "Passage d'argument float4 : %s\n"
+
+#: pg_resetxlog.c:694 pg_resetxlog.c:696
+msgid "by reference"
+msgstr "par r�f�rence"
+
+#: pg_resetxlog.c:694 pg_resetxlog.c:696
+msgid "by value"
+msgstr "par valeur"
+
+#: pg_resetxlog.c:695
+#, c-format
+msgid "Float8 argument passing: %s\n"
+msgstr "Passage d'argument float8 : %s\n"
+
+#: pg_resetxlog.c:697
+#, c-format
+msgid "Data page checksum version: %u\n"
+msgstr "Version des sommes de contr�le des pages de donn�es : %u\n"
+
+#: pg_resetxlog.c:711
+#, c-format
+msgid ""
+"\n"
+"\n"
+"Values to be changed:\n"
+"\n"
+msgstr ""
+"\n"
+"\n"
+"Valeurs � changer :\n"
+"\n"
+
+#: pg_resetxlog.c:714
+#, c-format
+msgid "First log segment after reset: %s\n"
+msgstr "Premier segment du journal apr�s r�initialisation : %s\n"
+
+#: pg_resetxlog.c:718
+#, c-format
+msgid "NextMultiXactId: %u\n"
+msgstr "NextMultiXactId: %u\n"
+
+#: pg_resetxlog.c:720
+#, c-format
+msgid "OldestMultiXid: %u\n"
+msgstr "OldestMultiXid: %u\n"
+
+#: pg_resetxlog.c:722
+#, c-format
+msgid "OldestMulti's DB: %u\n"
+msgstr "OldestMulti's DB: %u\n"
+
+#: pg_resetxlog.c:728
+#, c-format
+msgid "NextMultiOffset: %u\n"
+msgstr "NextMultiOffset: %u\n"
+
+#: pg_resetxlog.c:734
+#, c-format
+msgid "NextOID: %u\n"
+msgstr "NextOID: %u\n"
+
+#: pg_resetxlog.c:740
+#, c-format
+msgid "NextXID: %u\n"
+msgstr "NextXID: %u\n"
+
+#: pg_resetxlog.c:742
+#, c-format
+msgid "OldestXID: %u\n"
+msgstr "OldestXID: %u\n"
+
+#: pg_resetxlog.c:744
+#, c-format
+msgid "OldestXID's DB: %u\n"
+msgstr "OldestXID's DB: %u\n"
+
+#: pg_resetxlog.c:750
+#, c-format
+msgid "NextXID epoch: %u\n"
+msgstr "NextXID Epoch: %u\n"
+
+#: pg_resetxlog.c:756
+#, c-format
+msgid "oldestCommitTsXid: %u\n"
+msgstr "oldestCommitTsXid: %u\n"
+
+#: pg_resetxlog.c:761
+#, c-format
+msgid "newestCommitTsXid: %u\n"
+msgstr "newestCommitTsXid: %u\n"
+
+#: pg_resetxlog.c:827
+#, c-format
+msgid "%s: internal error -- sizeof(ControlFileData) is too large ... fix PG_CONTROL_SIZE\n"
+msgstr ""
+"%s : erreur interne -- sizeof(ControlFileData) est trop important...\n"
+"corrigez PG_CONTROL_SIZE\n"
+
+#: pg_resetxlog.c:842
+#, c-format
+msgid "%s: could not create pg_control file: %s\n"
+msgstr "%s : n'a pas pu cr�er le fichier pg_control : %s\n"
+
+#: pg_resetxlog.c:853
+#, c-format
+msgid "%s: could not write pg_control file: %s\n"
+msgstr "%s : n'a pas pu �crire le fichier pg_control : %s\n"
+
+#: pg_resetxlog.c:860 pg_resetxlog.c:1156
+#, c-format
+msgid "%s: fsync error: %s\n"
+msgstr "%s : erreur fsync : %s\n"
+
+#: pg_resetxlog.c:900 pg_resetxlog.c:971 pg_resetxlog.c:1022
+#, c-format
+msgid "%s: could not open directory \"%s\": %s\n"
+msgstr "%s : n'a pas pu ouvrir le r�pertoire � %s � : %s\n"
+
+#: pg_resetxlog.c:936 pg_resetxlog.c:993 pg_resetxlog.c:1047
+#, c-format
+msgid "%s: could not read directory \"%s\": %s\n"
+msgstr "%s : n'a pas pu lire le r�pertoire � %s � : %s\n"
+
+#: pg_resetxlog.c:943 pg_resetxlog.c:1000 pg_resetxlog.c:1054
+#, c-format
+msgid "%s: could not close directory \"%s\": %s\n"
+msgstr "%s : n'a pas pu fermer le r�pertoire � %s � : %s\n"
+
+#: pg_resetxlog.c:984 pg_resetxlog.c:1038
+#, c-format
+msgid "%s: could not delete file \"%s\": %s\n"
+msgstr "%s : n'a pas pu supprimer le fichier � %s � : %s\n"
+
+#: pg_resetxlog.c:1123
+#, c-format
+msgid "%s: could not open file \"%s\": %s\n"
+msgstr "%s : n'a pas pu ouvrir le fichier � %s � : %s\n"
+
+#: pg_resetxlog.c:1134 pg_resetxlog.c:1148
+#, c-format
+msgid "%s: could not write file \"%s\": %s\n"
+msgstr "%s : n'a pas pu �crire le fichier � %s � : %s\n"
+
+#: pg_resetxlog.c:1167
+#, c-format
+msgid ""
+"%s resets the PostgreSQL transaction log.\n"
+"\n"
+msgstr ""
+"%s r�initialise le journal des transactions PostgreSQL.\n"
+"\n"
+
+#: pg_resetxlog.c:1168
+#, c-format
+msgid ""
+"Usage:\n"
+" %s [OPTION]... DATADIR\n"
+"\n"
+msgstr ""
+"Usage :\n"
+" %s [OPTION]... R�P_DONN�ES\n"
+"\n"
+
+#: pg_resetxlog.c:1169
+#, c-format
+msgid "Options:\n"
+msgstr "Options :\n"
+
+#: pg_resetxlog.c:1170
+#, c-format
+msgid " -c XID,XID set oldest and newest transactions bearing commit timestamp\n"
+msgstr " -c XID,XID configure la plus ancienne et la plus r�cente transaction\n"
+
+#: pg_resetxlog.c:1171
+#, c-format
+msgid " (zero in either value means no change)\n"
+msgstr " (z�ro dans l'une des deux valeurs signifie aucun changement)\n"
+
+#: pg_resetxlog.c:1172
+#, c-format
+msgid " [-D] DATADIR data directory\n"
+msgstr " [-D] R�PDONNEES r�pertoire de la base de donn�es\n"
+
+#: pg_resetxlog.c:1173
+#, c-format
+msgid " -e XIDEPOCH set next transaction ID epoch\n"
+msgstr ""
+" -e XIDEPOCH fixe la valeur epoch du prochain identifiant de\n"
+" transaction\n"
+
+#: pg_resetxlog.c:1174
+#, c-format
+msgid " -f force update to be done\n"
+msgstr " -f force la mise � jour\n"
+
+#: pg_resetxlog.c:1175
+#, c-format
+msgid " -l XLOGFILE force minimum WAL starting location for new transaction log\n"
+msgstr ""
+" -l FICHIERXLOG force l'emplacement minimal de d�but des WAL du nouveau\n"
+" journal de transactions\n"
+
+#: pg_resetxlog.c:1176
+#, c-format
+msgid " -m MXID,MXID set next and oldest multitransaction ID\n"
+msgstr " -m MXID,MXID fixe le prochain identifiant multi-transaction\n"
+
+#: pg_resetxlog.c:1177
+#, c-format
+msgid " -n no update, just show what would be done (for testing)\n"
+msgstr ""
+" -n pas de mise � jour, affiche simplement ce qui sera fait\n"
+" (pour test)\n"
+
+#: pg_resetxlog.c:1178
+#, c-format
+msgid " -o OID set next OID\n"
+msgstr " -o OID fixe le prochain OID\n"
+
+#: pg_resetxlog.c:1179
+#, c-format
+msgid " -O OFFSET set next multitransaction offset\n"
+msgstr " -O D�CALAGE fixe le d�calage de la prochaine multi-transaction\n"
+
+#: pg_resetxlog.c:1180
+#, c-format
+msgid " -V, --version output version information, then exit\n"
+msgstr " -V, --version affiche la version et quitte\n"
+
+#: pg_resetxlog.c:1181
+#, c-format
+msgid " -x XID set next transaction ID\n"
+msgstr " -x XID fixe le prochain identifiant de transaction\n"
+
+#: pg_resetxlog.c:1182
+#, c-format
+msgid " -?, --help show this help, then exit\n"
+msgstr " -?, --help affiche cette aide et quitte\n"
+
+#: pg_resetxlog.c:1183
+#, c-format
+msgid ""
+"\n"
+"Report bugs to .\n"
+msgstr ""
+"\n"
+"Rapporter les bogues � .\n"
+
+#~ msgid "%s: invalid argument for option -x\n"
+#~ msgstr "%s : argument invalide pour l'option -x\n"
+
+#~ msgid "%s: invalid argument for option -o\n"
+#~ msgstr "%s : argument invalide pour l'option -o\n"
+
+#~ msgid "%s: invalid argument for option -m\n"
+#~ msgstr "%s : argument invalide pour l'option -m\n"
+
+#~ msgid "%s: invalid argument for option -O\n"
+#~ msgstr "%s : argument invalide pour l'option -O\n"
+
+#~ msgid "%s: invalid argument for option -l\n"
+#~ msgstr "%s : argument invalide pour l'option -l\n"
+
+#~ msgid "%s: could not read from directory \"%s\": %s\n"
+#~ msgstr "%s : n'a pas pu lire le r�pertoire � %s � : %s\n"
+
+#~ msgid " --help show this help, then exit\n"
+#~ msgstr " --help affiche cette aide et quitte\n"
+
+#~ msgid " --version output version information, then exit\n"
+#~ msgstr " --version afficherla version et quitte\n"
+
+#~ msgid "First log file ID after reset: %u\n"
+#~ msgstr "Premier identifiant du journal apr�s r�initialisation : %u\n"
diff --git a/src/bin/pg_resetwal/po/it.po b/src/bin/pg_resetwal/po/it.po
new file mode 100644
index 0000000000..9fc4479060
--- /dev/null
+++ b/src/bin/pg_resetwal/po/it.po
@@ -0,0 +1,620 @@
+#
+# Translation of pg_resetxlog to Italian
+# PostgreSQL Project
+#
+# Associazione Culturale ITPUG - Italian PostgreSQL Users Group
+# http://www.itpug.org/ - info@itpug.org
+#
+# Traduttori:
+# * Diego Cinelli
+# * Daniele Varrazzo
+#
+# Revisori:
+# * Emanuele Zamprogno
+#
+# Traduttori precedenti:
+# * Fabrizio Mazzoni
+# * Mirko Tebaldi
+#
+# Copyright (c) 2010, Associazione Culturale ITPUG
+# Distributed under the same license of the PostgreSQL project
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: pg_resetxlog (PostgreSQL) 9.6\n"
+"Report-Msgid-Bugs-To: pgsql-bugs@postgresql.org\n"
+"POT-Creation-Date: 2016-04-17 00:14+0000\n"
+"PO-Revision-Date: 2016-04-17 20:51+0100\n"
+"Last-Translator: Daniele Varrazzo \n"
+"Language-Team: Gruppo traduzioni ITPUG \n"
+"Language: it\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"X-Poedit-SourceCharset: utf-8\n"
+"Plural-Forms: nplurals=2; plural=n != 1;\n"
+"X-Generator: Poedit 1.5.4\n"
+
+#: ../../common/restricted_token.c:68
+#, c-format
+msgid "%s: WARNING: cannot create restricted tokens on this platform\n"
+msgstr "%s: ATTENZIONE: non è possibile creare token ristretti su questa piattaforma\n"
+
+#: ../../common/restricted_token.c:77
+#, c-format
+msgid "%s: could not open process token: error code %lu\n"
+msgstr "%s: apertura del token di processo fallita: codice di errore %lu\n"
+
+#: ../../common/restricted_token.c:90
+#, c-format
+msgid "%s: could not allocate SIDs: error code %lu\n"
+msgstr "%s: allocazione dei SID fallita: codice di errore: %lu\n"
+
+#: ../../common/restricted_token.c:110
+#, c-format
+msgid "%s: could not create restricted token: error code %lu\n"
+msgstr "%s: creazione del token ristretto fallita: codice di errore %lu\n"
+
+#: ../../common/restricted_token.c:132
+#, c-format
+msgid "%s: could not start process for command \"%s\": error code %lu\n"
+msgstr "%s: avvio del processo fallito per il comando \"%s\": codice di errore %lu\n"
+
+#: ../../common/restricted_token.c:170
+#, c-format
+msgid "%s: could not re-execute with restricted token: error code %lu\n"
+msgstr "%s: ri-esecuzione con token ristretto fallita: codice di errore %lu\n"
+
+#: ../../common/restricted_token.c:186
+#, c-format
+msgid "%s: could not get exit code from subprocess: error code %lu\n"
+msgstr "%s: lettura del codice di uscita del sottoprocesso fallita: codice di errore %lu\n"
+
+#. translator: the second %s is a command line argument (-e, etc)
+#: pg_resetxlog.c:140 pg_resetxlog.c:155 pg_resetxlog.c:170 pg_resetxlog.c:177
+#: pg_resetxlog.c:201 pg_resetxlog.c:216 pg_resetxlog.c:224 pg_resetxlog.c:250
+#: pg_resetxlog.c:264
+#, c-format
+msgid "%s: invalid argument for option %s\n"
+msgstr "%s: argomento non valido per l'opzione %s\n"
+
+#: pg_resetxlog.c:141 pg_resetxlog.c:156 pg_resetxlog.c:171 pg_resetxlog.c:178
+#: pg_resetxlog.c:202 pg_resetxlog.c:217 pg_resetxlog.c:225 pg_resetxlog.c:251
+#: pg_resetxlog.c:265 pg_resetxlog.c:272 pg_resetxlog.c:285 pg_resetxlog.c:293
+#, c-format
+msgid "Try \"%s --help\" for more information.\n"
+msgstr "Prova \"%s --help\" per maggiori informazioni.\n"
+
+#: pg_resetxlog.c:146
+#, c-format
+msgid "%s: transaction ID epoch (-e) must not be -1\n"
+msgstr "%s: l'ID epoch della transazione (-e) non deve essere -1\n"
+
+#: pg_resetxlog.c:161
+#, c-format
+msgid "%s: transaction ID (-x) must not be 0\n"
+msgstr "%s: l'ID della transazione (-x) non deve essere 0\n"
+
+#: pg_resetxlog.c:185 pg_resetxlog.c:192
+#, c-format
+msgid "%s: transaction ID (-c) must be either 0 or greater than or equal to 2\n"
+msgstr "%s: l'ID di transazione (-c) dev'essere 0 or maggiore o uguale a 2\n"
+
+#: pg_resetxlog.c:207
+#, c-format
+msgid "%s: OID (-o) must not be 0\n"
+msgstr "%s: l'OID (-o) non deve essere 0\n"
+
+#: pg_resetxlog.c:230
+#, c-format
+msgid "%s: multitransaction ID (-m) must not be 0\n"
+msgstr "%s: l'ID della multitransazione (-m) non deve essere 0\n"
+
+#: pg_resetxlog.c:240
+#, c-format
+msgid "%s: oldest multitransaction ID (-m) must not be 0\n"
+msgstr "%s: l'ID multitransazione più vecchio (-m) non può essere 0\n"
+
+#: pg_resetxlog.c:256
+#, c-format
+msgid "%s: multitransaction offset (-O) must not be -1\n"
+msgstr "%s: l'offset di una multitransazione (-O) non può essere -1\n"
+
+#: pg_resetxlog.c:283
+#, c-format
+msgid "%s: too many command-line arguments (first is \"%s\")\n"
+msgstr "%s: troppi argomenti di riga di comando (il primo è \"%s\")\n"
+
+#: pg_resetxlog.c:292
+#, c-format
+msgid "%s: no data directory specified\n"
+msgstr "%s: non è stata specificata una directory per i dati\n"
+
+#: pg_resetxlog.c:306
+#, c-format
+msgid "%s: cannot be executed by \"root\"\n"
+msgstr "%s non può essere eseguito da \"root\"\n"
+
+#: pg_resetxlog.c:308
+#, c-format
+msgid "You must run %s as the PostgreSQL superuser.\n"
+msgstr "È obbligatorio eseguire %s come superutente di PostgreSQL.\n"
+
+#: pg_resetxlog.c:318
+#, c-format
+msgid "%s: could not change directory to \"%s\": %s\n"
+msgstr "%s: spostamento nella directory \"%s\" fallito: %s\n"
+
+#: pg_resetxlog.c:331 pg_resetxlog.c:477
+#, c-format
+msgid "%s: could not open file \"%s\" for reading: %s\n"
+msgstr "%s: errore nell'apertura del file \"%s\" per la lettura: %s\n"
+
+#: pg_resetxlog.c:338
+#, c-format
+msgid ""
+"%s: lock file \"%s\" exists\n"
+"Is a server running? If not, delete the lock file and try again.\n"
+msgstr ""
+"%s: il file di lock \"%s\" esiste\n"
+"Il server è in esecuzione? Se non lo è, cancella il file di lock e riprova.\n"
+
+#: pg_resetxlog.c:425
+#, c-format
+msgid ""
+"\n"
+"If these values seem acceptable, use -f to force reset.\n"
+msgstr ""
+"\n"
+"Se questi parametri sembrano accettabili, utilizza -f per forzare un reset.\n"
+
+#: pg_resetxlog.c:437
+#, c-format
+msgid ""
+"The database server was not shut down cleanly.\n"
+"Resetting the transaction log might cause data to be lost.\n"
+"If you want to proceed anyway, use -f to force reset.\n"
+msgstr ""
+"Il server database non è stato arrestato correttamente.\n"
+"Resettare il registro delle transazioni può causare una perdita di dati.\n"
+"Se vuoi continuare comunque, utilizza -f per forzare il reset.\n"
+
+#: pg_resetxlog.c:451
+#, c-format
+msgid "Transaction log reset\n"
+msgstr "Registro delle transazioni riavviato\n"
+
+#: pg_resetxlog.c:480
+#, c-format
+msgid ""
+"If you are sure the data directory path is correct, execute\n"
+" touch %s\n"
+"and try again.\n"
+msgstr ""
+"Se sei sicuro che il percorso della directory dei dati è corretto, esegui\n"
+" touch %s\n"
+"e riprova.\n"
+
+#: pg_resetxlog.c:493
+#, c-format
+msgid "%s: could not read file \"%s\": %s\n"
+msgstr "%s: lettura del file \"%s\" fallita: %s\n"
+
+#: pg_resetxlog.c:516
+#, c-format
+msgid "%s: pg_control exists but has invalid CRC; proceed with caution\n"
+msgstr "%s: pg_control esiste ma ha un CRC non valido; procedere con cautela\n"
+
+#: pg_resetxlog.c:525
+#, c-format
+msgid "%s: pg_control exists but is broken or unknown version; ignoring it\n"
+msgstr "%s: pg_control esiste ma è inutilizzabile o è una versione sconosciuta; verrà ignorato\n"
+
+#: pg_resetxlog.c:628
+#, c-format
+msgid ""
+"Guessed pg_control values:\n"
+"\n"
+msgstr ""
+"Valori pg_control indovinati:\n"
+"\n"
+
+#: pg_resetxlog.c:630
+#, c-format
+msgid ""
+"Current pg_control values:\n"
+"\n"
+msgstr ""
+"Valori pg_control attuali:\n"
+"\n"
+
+#: pg_resetxlog.c:639
+#, c-format
+msgid "pg_control version number: %u\n"
+msgstr "Numero di versione di pg_control: %u\n"
+
+#: pg_resetxlog.c:641
+#, c-format
+msgid "Catalog version number: %u\n"
+msgstr "Numero di versione del catalogo: %u\n"
+
+#: pg_resetxlog.c:643
+#, c-format
+msgid "Database system identifier: %s\n"
+msgstr "Identificatore di sistema del database: %s\n"
+
+#: pg_resetxlog.c:645
+#, c-format
+msgid "Latest checkpoint's TimeLineID: %u\n"
+msgstr "TimeLineId dell'ultimo checkpoint: %u\n"
+
+#: pg_resetxlog.c:647
+#, c-format
+msgid "Latest checkpoint's full_page_writes: %s\n"
+msgstr "full_page_writes dell'ultimo checkpoint: %s\n"
+
+#: pg_resetxlog.c:648
+msgid "off"
+msgstr "disattivato"
+
+#: pg_resetxlog.c:648
+msgid "on"
+msgstr "attivato"
+
+#: pg_resetxlog.c:649
+#, c-format
+msgid "Latest checkpoint's NextXID: %u:%u\n"
+msgstr "NextXID dell'ultimo checkpoint: %u.%u\n"
+
+#: pg_resetxlog.c:652
+#, c-format
+msgid "Latest checkpoint's NextOID: %u\n"
+msgstr "NextOID dell'ultimo checkpoint: %u\n"
+
+#: pg_resetxlog.c:654
+#, c-format
+msgid "Latest checkpoint's NextMultiXactId: %u\n"
+msgstr "NextMultiXactId dell'ultimo checkpoint: %u\n"
+
+#: pg_resetxlog.c:656
+#, c-format
+msgid "Latest checkpoint's NextMultiOffset: %u\n"
+msgstr "NextMultiOffset dell'ultimo checkpoint: %u\n"
+
+#: pg_resetxlog.c:658
+#, c-format
+msgid "Latest checkpoint's oldestXID: %u\n"
+msgstr "oldestXID dell'ultimo checkpoint: %u\n"
+
+#: pg_resetxlog.c:660
+#, c-format
+msgid "Latest checkpoint's oldestXID's DB: %u\n"
+msgstr "DB dell'oldestXID dell'ultimo checkpoint: %u\n"
+
+#: pg_resetxlog.c:662
+#, c-format
+msgid "Latest checkpoint's oldestActiveXID: %u\n"
+msgstr "oldestActiveXID dell'ultimo checkpoint: %u\n"
+
+#: pg_resetxlog.c:664
+#, c-format
+msgid "Latest checkpoint's oldestMultiXid: %u\n"
+msgstr "oldestMultiXID dell'ultimo checkpoint: %u\n"
+
+#: pg_resetxlog.c:666
+#, c-format
+msgid "Latest checkpoint's oldestMulti's DB: %u\n"
+msgstr "DB dell'oldestMulti dell'ultimo checkpoint: %u\n"
+
+#: pg_resetxlog.c:668
+#, c-format
+msgid "Latest checkpoint's oldestCommitTsXid:%u\n"
+msgstr "oldestCommitTsXid dell'ultimo checkpoint: %u\n"
+
+#: pg_resetxlog.c:670
+#, c-format
+msgid "Latest checkpoint's newestCommitTsXid:%u\n"
+msgstr "newestCommitTsXid dell'ultimo checkpoint: %u\n"
+
+#: pg_resetxlog.c:672
+#, c-format
+msgid "Maximum data alignment: %u\n"
+msgstr "Massimo allineamento dei dati: %u\n"
+
+#: pg_resetxlog.c:675
+#, c-format
+msgid "Database block size: %u\n"
+msgstr "Dimensione blocco database: %u\n"
+
+#: pg_resetxlog.c:677
+#, c-format
+msgid "Blocks per segment of large relation: %u\n"
+msgstr "Blocchi per ogni segmento grosse tabelle: %u\n"
+
+#: pg_resetxlog.c:679
+#, c-format
+msgid "WAL block size: %u\n"
+msgstr "Dimensione blocco WAL: %u\n"
+
+#: pg_resetxlog.c:681
+#, c-format
+msgid "Bytes per WAL segment: %u\n"
+msgstr "Byte per segmento WAL: %u\n"
+
+#: pg_resetxlog.c:683
+#, c-format
+msgid "Maximum length of identifiers: %u\n"
+msgstr "Lunghezza massima degli identificatori: %u\n"
+
+#: pg_resetxlog.c:685
+#, c-format
+msgid "Maximum columns in an index: %u\n"
+msgstr "Massimo numero di colonne in un indice: %u\n"
+
+#: pg_resetxlog.c:687
+#, c-format
+msgid "Maximum size of a TOAST chunk: %u\n"
+msgstr "Massima dimensione di un segmento TOAST: %u\n"
+
+#: pg_resetxlog.c:689
+#, c-format
+msgid "Size of a large-object chunk: %u\n"
+msgstr "Dimensione di un blocco large-object: %u\n"
+
+#: pg_resetxlog.c:691
+#, c-format
+msgid "Date/time type storage: %s\n"
+msgstr "Memorizzazione per tipi data/ora: %s\n"
+
+#: pg_resetxlog.c:692
+msgid "64-bit integers"
+msgstr "interi a 64 bit"
+
+#: pg_resetxlog.c:692
+msgid "floating-point numbers"
+msgstr "numeri in virgola mobile"
+
+#: pg_resetxlog.c:693
+#, c-format
+msgid "Float4 argument passing: %s\n"
+msgstr "Passaggio di argomenti Float4: %s\n"
+
+#: pg_resetxlog.c:694 pg_resetxlog.c:696
+msgid "by reference"
+msgstr "per riferimento"
+
+#: pg_resetxlog.c:694 pg_resetxlog.c:696
+msgid "by value"
+msgstr "per valore"
+
+#: pg_resetxlog.c:695
+#, c-format
+msgid "Float8 argument passing: %s\n"
+msgstr "passaggio di argomenti Float8: %s\n"
+
+#: pg_resetxlog.c:697
+#, c-format
+msgid "Data page checksum version: %u\n"
+msgstr "Versione somma di controllo dati pagine: %u\n"
+
+#: pg_resetxlog.c:711
+#, c-format
+msgid ""
+"\n"
+"\n"
+"Values to be changed:\n"
+"\n"
+msgstr ""
+"\n"
+"\n"
+"Valori da cambiare:\n"
+"\n"
+
+#: pg_resetxlog.c:714
+#, c-format
+msgid "First log segment after reset: %s\n"
+msgstr "Primo segmento di log dopo il reset: %s\n"
+
+#: pg_resetxlog.c:718
+#, c-format
+msgid "NextMultiXactId: %u\n"
+msgstr "NextMultiXactId: %u\n"
+
+#: pg_resetxlog.c:720
+#, c-format
+msgid "OldestMultiXid: %u\n"
+msgstr "OldestMultiXid: %u\n"
+
+#: pg_resetxlog.c:722
+#, c-format
+msgid "OldestMulti's DB: %u\n"
+msgstr "DB di OldestMulti: %u\n"
+
+#: pg_resetxlog.c:728
+#, c-format
+msgid "NextMultiOffset: %u\n"
+msgstr "NextMultiOffset: %u\n"
+
+#: pg_resetxlog.c:734
+#, c-format
+msgid "NextOID: %u\n"
+msgstr "NextOID: %u\n"
+
+#: pg_resetxlog.c:740
+#, c-format
+msgid "NextXID: %u\n"
+msgstr "NextXID: %u\n"
+
+#: pg_resetxlog.c:742
+#, c-format
+msgid "OldestXID: %u\n"
+msgstr "OldestXID: %u\n"
+
+#: pg_resetxlog.c:744
+#, c-format
+msgid "OldestXID's DB: %u\n"
+msgstr "DB di OldestXID: %u\n"
+
+#: pg_resetxlog.c:750
+#, c-format
+msgid "NextXID epoch: %u\n"
+msgstr "Epoca del NextXID: %u\n"
+
+#: pg_resetxlog.c:756
+#, c-format
+msgid "oldestCommitTsXid: %u\n"
+msgstr "oldestCommitTsXid: %u\n"
+
+#: pg_resetxlog.c:761
+#, c-format
+msgid "newestCommitTsXid: %u\n"
+msgstr "newestCommitTsXid: %u\n"
+
+#: pg_resetxlog.c:827
+#, c-format
+msgid "%s: internal error -- sizeof(ControlFileData) is too large ... fix PG_CONTROL_SIZE\n"
+msgstr "%s: errore interno -- sizeof(ControlFileData) è troppo grande ... correggere PG_CONTROL_SIZE\n"
+
+#: pg_resetxlog.c:842
+#, c-format
+msgid "%s: could not create pg_control file: %s\n"
+msgstr "%s: creazione del file pg_control fallita: %s\n"
+
+#: pg_resetxlog.c:853
+#, c-format
+msgid "%s: could not write pg_control file: %s\n"
+msgstr "%s: scrittura del file pg_control fallita: %s\n"
+
+#: pg_resetxlog.c:860 pg_resetxlog.c:1156
+#, c-format
+msgid "%s: fsync error: %s\n"
+msgstr "%s: errore fsync: %s\n"
+
+#: pg_resetxlog.c:900 pg_resetxlog.c:971 pg_resetxlog.c:1022
+#, c-format
+msgid "%s: could not open directory \"%s\": %s\n"
+msgstr "%s: apertura della directory \"%s\" fallita: %s\n"
+
+#: pg_resetxlog.c:936 pg_resetxlog.c:993 pg_resetxlog.c:1047
+#, c-format
+msgid "%s: could not read directory \"%s\": %s\n"
+msgstr "%s: lettura della directory \"%s\" fallita: %s\n"
+
+#: pg_resetxlog.c:943 pg_resetxlog.c:1000 pg_resetxlog.c:1054
+#, c-format
+msgid "%s: could not close directory \"%s\": %s\n"
+msgstr "%s: chiusura della directory \"%s\" fallita: %s\n"
+
+#: pg_resetxlog.c:984 pg_resetxlog.c:1038
+#, c-format
+msgid "%s: could not delete file \"%s\": %s\n"
+msgstr "%s: cancellazione del file \"%s\" fallita: %s\n"
+
+#: pg_resetxlog.c:1123
+#, c-format
+msgid "%s: could not open file \"%s\": %s\n"
+msgstr "%s: apertura del file \"%s\" fallita: %s\n"
+
+#: pg_resetxlog.c:1134 pg_resetxlog.c:1148
+#, c-format
+msgid "%s: could not write file \"%s\": %s\n"
+msgstr "%s: errore nella scrittura del file \"%s\": %s\n"
+
+#: pg_resetxlog.c:1167
+#, c-format
+msgid ""
+"%s resets the PostgreSQL transaction log.\n"
+"\n"
+msgstr ""
+"%s riavvia il registro delle transazioni di PostgreSQL.\n"
+"\n"
+
+#: pg_resetxlog.c:1168
+#, c-format
+msgid ""
+"Usage:\n"
+" %s [OPTION]... DATADIR\n"
+"\n"
+msgstr ""
+"Utilizzo:\n"
+" %s [OPZIONI]... DATADIR\n"
+"\n"
+
+#: pg_resetxlog.c:1169
+#, c-format
+msgid "Options:\n"
+msgstr "Opzioni:\n"
+
+#: pg_resetxlog.c:1170
+#, c-format
+msgid " -c XID,XID set oldest and newest transactions bearing commit timestamp\n"
+msgstr " -c XID,XID imposta le transazione più vecchia e più nuova che portano l'ora di commit\n"
+
+#: pg_resetxlog.c:1171
+#, c-format
+msgid " (zero in either value means no change)\n"
+msgstr " (zero in uno dei dei valori vuol dire nessun cambiamento)\n"
+
+#: pg_resetxlog.c:1172
+#, c-format
+msgid " [-D] DATADIR data directory\n"
+msgstr " [-D] DATADIR directory dei dati\n"
+
+#: pg_resetxlog.c:1173
+#, c-format
+msgid " -e XIDEPOCH set next transaction ID epoch\n"
+msgstr " -e XIDEPOCH imposta il prossimo ID epoch transazione\n"
+
+#: pg_resetxlog.c:1174
+#, c-format
+msgid " -f force update to be done\n"
+msgstr " -f forza l'esecuzione dell'aggiornamento\n"
+
+#: pg_resetxlog.c:1175
+#, c-format
+msgid " -l XLOGFILE force minimum WAL starting location for new transaction log\n"
+msgstr " -l XLOGFILE forza la locazione di inizio WAL minima per il nuovo log transazioni\n"
+
+#: pg_resetxlog.c:1176
+#, c-format
+msgid " -m MXID,MXID set next and oldest multitransaction ID\n"
+msgstr " -m MXID,MXID imposta gli ID multitransazione successivo e più vecchio\n"
+
+#: pg_resetxlog.c:1177
+#, c-format
+msgid " -n no update, just show what would be done (for testing)\n"
+msgstr " -n nessuna modifica, mostra solo cosa sarebbe fatto (per prova)\n"
+
+#: pg_resetxlog.c:1178
+#, c-format
+msgid " -o OID set next OID\n"
+msgstr " -o OID imposta il prossimo OID\n"
+
+#: pg_resetxlog.c:1179
+#, c-format
+msgid " -O OFFSET set next multitransaction offset\n"
+msgstr " -O OFFSET imposta il prossimo offset multitransazione\n"
+
+#: pg_resetxlog.c:1180
+#, c-format
+msgid " -V, --version output version information, then exit\n"
+msgstr " -V, --version mostra informazioni sulla versione ed esci\n"
+
+#: pg_resetxlog.c:1181
+#, c-format
+msgid " -x XID set next transaction ID\n"
+msgstr " -x XID imposta il prossimo ID di transazione\n"
+
+#: pg_resetxlog.c:1182
+#, c-format
+msgid " -?, --help show this help, then exit\n"
+msgstr " -?, --help mostra questo aiuto ed esci\n"
+
+#: pg_resetxlog.c:1183
+#, c-format
+msgid ""
+"\n"
+"Report bugs to .\n"
+msgstr ""
+"\n"
+"Puoi segnalare eventuali bug a .\n"
diff --git a/src/bin/pg_resetwal/po/ja.po b/src/bin/pg_resetwal/po/ja.po
new file mode 100644
index 0000000000..54a0f870f1
--- /dev/null
+++ b/src/bin/pg_resetwal/po/ja.po
@@ -0,0 +1,490 @@
+# translation of initdb.
+# Shigehiro Honda , 2005.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PostgreSQL 9.0 beta 3\n"
+"Report-Msgid-Bugs-To: pgsql-bugs@postgresql.org\n"
+"POT-Creation-Date: 2013-08-18 12:05+0900\n"
+"PO-Revision-Date: 2013-08-18 12:10+0900\n"
+"Last-Translator: HOTTA Michihide \n"
+"Language-Team: jpug-doc \n"
+"Language: ja\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+
+#: pg_resetxlog.c:133
+#, c-format
+msgid "%s: invalid argument for option -e\n"
+msgstr "%s: オプション -e の引数が無効です\n"
+
+#: pg_resetxlog.c:134 pg_resetxlog.c:149 pg_resetxlog.c:164 pg_resetxlog.c:179
+#: pg_resetxlog.c:187 pg_resetxlog.c:213 pg_resetxlog.c:227 pg_resetxlog.c:234
+#: pg_resetxlog.c:242
+#, c-format
+msgid "Try \"%s --help\" for more information.\n"
+msgstr "詳細は\"%s --help\"を実行してください\n"
+
+#: pg_resetxlog.c:139
+#, c-format
+msgid "%s: transaction ID epoch (-e) must not be -1\n"
+msgstr "%s: トランザクションID エポック(-e)は -1 であってはなりません\n"
+
+#: pg_resetxlog.c:148
+#, c-format
+msgid "%s: invalid argument for option -x\n"
+msgstr "%s: オプション-xの引数が無効です\n"
+
+#: pg_resetxlog.c:154
+#, c-format
+msgid "%s: transaction ID (-x) must not be 0\n"
+msgstr "%s: トランザクションID(-x)は非0でなければなりません\n"
+
+#: pg_resetxlog.c:163
+#, c-format
+msgid "%s: invalid argument for option -o\n"
+msgstr "%s: オプション-oの引数が無効です\n"
+
+#: pg_resetxlog.c:169
+#, c-format
+msgid "%s: OID (-o) must not be 0\n"
+msgstr "%s: OID(-o)は非0でなければなりません\n"
+
+#: pg_resetxlog.c:178 pg_resetxlog.c:186
+#, c-format
+msgid "%s: invalid argument for option -m\n"
+msgstr "%s: オプション-mの引数が無効です\n"
+
+#: pg_resetxlog.c:192
+#, c-format
+msgid "%s: multitransaction ID (-m) must not be 0\n"
+msgstr "%s: マルチトランザクションID(-m)は非0でなければなりません\n"
+
+#: pg_resetxlog.c:202
+#, c-format
+#| msgid "%s: multitransaction ID (-m) must not be 0\n"
+msgid "%s: oldest multitransaction ID (-m) must not be 0\n"
+msgstr "%s: 最も古いマルチトランザクションID(-m)は非0でなければなりません\n"
+
+#: pg_resetxlog.c:212
+#, c-format
+msgid "%s: invalid argument for option -O\n"
+msgstr "%s: オプション-Oの引数が無効です\n"
+
+#: pg_resetxlog.c:218
+#, c-format
+msgid "%s: multitransaction offset (-O) must not be -1\n"
+msgstr "%s: マルチトランザクションオフセット(-O)は-1ではいけません\n"
+
+#: pg_resetxlog.c:226
+#, c-format
+msgid "%s: invalid argument for option -l\n"
+msgstr "%s: オプション-lの引数が無効です\n"
+
+#: pg_resetxlog.c:241
+#, c-format
+msgid "%s: no data directory specified\n"
+msgstr "%s: データディレクトリが指定されていません\n"
+
+#: pg_resetxlog.c:255
+#, c-format
+msgid "%s: cannot be executed by \"root\"\n"
+msgstr "%s: \"root\"では実行できません\n"
+
+#: pg_resetxlog.c:257
+#, c-format
+msgid "You must run %s as the PostgreSQL superuser.\n"
+msgstr "PostgreSQLのスーパーユーザで%sを実行しなければなりません\n"
+
+#: pg_resetxlog.c:267
+#, c-format
+msgid "%s: could not change directory to \"%s\": %s\n"
+msgstr "%s: ディレクトリ\"%s\"に移動できませんでした: %s\n"
+
+#: pg_resetxlog.c:280 pg_resetxlog.c:414
+#, c-format
+msgid "%s: could not open file \"%s\" for reading: %s\n"
+msgstr "%s: 読み取り用のファイル\"%s\"をオープンできませんでした: %s\n"
+
+#: pg_resetxlog.c:287
+#, c-format
+msgid ""
+"%s: lock file \"%s\" exists\n"
+"Is a server running? If not, delete the lock file and try again.\n"
+msgstr ""
+"%s: ロックファイル\"%s\"があります\n"
+"サーバが稼動していませんか? 稼動していなければロックファイルを削除し再実行してください。\n"
+
+#: pg_resetxlog.c:362
+#, c-format
+msgid ""
+"\n"
+"If these values seem acceptable, use -f to force reset.\n"
+msgstr ""
+"\n"
+"この値が適切だと思われるのであれば、-fを使用して強制リセットしてください。\n"
+
+#: pg_resetxlog.c:374
+#, c-format
+msgid ""
+"The database server was not shut down cleanly.\n"
+"Resetting the transaction log might cause data to be lost.\n"
+"If you want to proceed anyway, use -f to force reset.\n"
+msgstr ""
+"データベースサーバが正しくシャットダウンされていませんでした。\n"
+"トランザクションログのリセットにはデータ損失の恐れがあります。\n"
+"とにかく処理したいのであれば、-fを使用して強制的にリセットしてください。\n"
+
+#: pg_resetxlog.c:388
+#, c-format
+msgid "Transaction log reset\n"
+msgstr "トランザクションログをリセットします。\n"
+
+#: pg_resetxlog.c:417
+#, c-format
+msgid ""
+"If you are sure the data directory path is correct, execute\n"
+" touch %s\n"
+"and try again.\n"
+msgstr ""
+"確実にデータディレクトリのパスが正しければ、\n"
+" touch %s\n"
+"を実行し、再実行してください。\n"
+"\n"
+
+#: pg_resetxlog.c:430
+#, c-format
+msgid "%s: could not read file \"%s\": %s\n"
+msgstr "%s: ファイル\"%s\"を読み込めませんでした: %s\n"
+
+#: pg_resetxlog.c:453
+#, c-format
+msgid "%s: pg_control exists but has invalid CRC; proceed with caution\n"
+msgstr "%s: pg_controlがありましたが、CRCが無効でした。警告付きで続行します\n"
+
+#: pg_resetxlog.c:462
+#, c-format
+msgid "%s: pg_control exists but is broken or unknown version; ignoring it\n"
+msgstr "%s: pg_controlがありましたが、破損あるいは未知のバージョンでしたので無視します\n"
+
+#: pg_resetxlog.c:562
+#, c-format
+msgid ""
+"Guessed pg_control values:\n"
+"\n"
+msgstr ""
+"pg_controlの推測値:\n"
+"\n"
+
+#: pg_resetxlog.c:564
+#, c-format
+msgid ""
+"pg_control values:\n"
+"\n"
+msgstr ""
+"pg_controlの値:\n"
+"\n"
+
+#: pg_resetxlog.c:575
+#, c-format
+#| msgid "First log file segment after reset: %u\n"
+msgid "First log segment after reset: %s\n"
+msgstr "リセット後、最初のログセグメント: %s\n"
+
+#: pg_resetxlog.c:577
+#, c-format
+msgid "pg_control version number: %u\n"
+msgstr "pg_controlバージョン番号: %u\n"
+
+#: pg_resetxlog.c:579
+#, c-format
+msgid "Catalog version number: %u\n"
+msgstr "カタログバージョン番号: %u\n"
+
+#: pg_resetxlog.c:581
+#, c-format
+msgid "Database system identifier: %s\n"
+msgstr "データベースシステム識別子: %s\n"
+
+#: pg_resetxlog.c:583
+#, c-format
+msgid "Latest checkpoint's TimeLineID: %u\n"
+msgstr "最終チェックポイントの時系列ID: %u\n"
+
+#: pg_resetxlog.c:585
+#, c-format
+msgid "Latest checkpoint's full_page_writes: %s\n"
+msgstr "最終チェックポイントのfull_page_writes %s\n"
+
+#: pg_resetxlog.c:586
+msgid "off"
+msgstr "オフ"
+
+#: pg_resetxlog.c:586
+msgid "on"
+msgstr "オン"
+
+#: pg_resetxlog.c:587
+#, c-format
+msgid "Latest checkpoint's NextXID: %u/%u\n"
+msgstr "最終チェックポイントのNextXID: %u/%u\n"
+
+#: pg_resetxlog.c:590
+#, c-format
+msgid "Latest checkpoint's NextOID: %u\n"
+msgstr "最終チェックポイントのNextOID: %u\n"
+
+#: pg_resetxlog.c:592
+#, c-format
+msgid "Latest checkpoint's NextMultiXactId: %u\n"
+msgstr "最終チェックポイントのNextMultiXactId: %u\n"
+
+#: pg_resetxlog.c:594
+#, c-format
+msgid "Latest checkpoint's NextMultiOffset: %u\n"
+msgstr "最終チェックポイントのNextMultiOffset: %u\n"
+
+#: pg_resetxlog.c:596
+#, c-format
+msgid "Latest checkpoint's oldestXID: %u\n"
+msgstr "最終チェックポイントのoldestXID: %u\n"
+
+#: pg_resetxlog.c:598
+#, c-format
+msgid "Latest checkpoint's oldestXID's DB: %u\n"
+msgstr "最終チェックポイントのoldestXIDのDB: %u\n"
+
+#: pg_resetxlog.c:600
+#, c-format
+msgid "Latest checkpoint's oldestActiveXID: %u\n"
+msgstr "最終チェックポイントのoldestActiveXID: %u\n"
+
+#: pg_resetxlog.c:602
+#, c-format
+msgid "Latest checkpoint's oldestMultiXid: %u\n"
+msgstr "最終チェックポイントのoldestMultiXid: %u\n"
+
+#: pg_resetxlog.c:604
+#, c-format
+msgid "Latest checkpoint's oldestMulti's DB: %u\n"
+msgstr "最終チェックポイントのoldestMulti'sのDB: %u\n"
+
+#: pg_resetxlog.c:606
+#, c-format
+msgid "Maximum data alignment: %u\n"
+msgstr "最大のデータアライメント: %u\n"
+
+#: pg_resetxlog.c:609
+#, c-format
+msgid "Database block size: %u\n"
+msgstr "データベースブロックサイズ: %u\n"
+
+#: pg_resetxlog.c:611
+#, c-format
+msgid "Blocks per segment of large relation: %u\n"
+msgstr "ラージリレーションセグメントのブロック数: %u\n"
+
+#: pg_resetxlog.c:613
+#, c-format
+msgid "WAL block size: %u\n"
+msgstr "WALブロックのサイズ: %u\n"
+
+#: pg_resetxlog.c:615
+#, c-format
+msgid "Bytes per WAL segment: %u\n"
+msgstr "WALセグメント当たりのバイト数: %u\n"
+
+#: pg_resetxlog.c:617
+#, c-format
+msgid "Maximum length of identifiers: %u\n"
+msgstr "識別子の最大長: %u\n"
+
+#: pg_resetxlog.c:619
+#, c-format
+msgid "Maximum columns in an index: %u\n"
+msgstr "インデックス内の最大列数: %u\n"
+
+#: pg_resetxlog.c:621
+#, c-format
+msgid "Maximum size of a TOAST chunk: %u\n"
+msgstr "TOAST チャンク一個の最大サイズ: %u\n"
+
+#: pg_resetxlog.c:623
+#, c-format
+msgid "Date/time type storage: %s\n"
+msgstr "日付/時刻型の格納方式 %s\n"
+
+#: pg_resetxlog.c:624
+msgid "64-bit integers"
+msgstr "64ビット整数"
+
+#: pg_resetxlog.c:624
+msgid "floating-point numbers"
+msgstr "浮動小数点数"
+
+#: pg_resetxlog.c:625
+#, c-format
+msgid "Float4 argument passing: %s\n"
+msgstr "Float4 引数の渡し方: %s\n"
+
+#: pg_resetxlog.c:626 pg_resetxlog.c:628
+msgid "by reference"
+msgstr "参照渡し"
+
+#: pg_resetxlog.c:626 pg_resetxlog.c:628
+msgid "by value"
+msgstr "値渡し"
+
+#: pg_resetxlog.c:627
+#, c-format
+msgid "Float8 argument passing: %s\n"
+msgstr "Float8 引数の渡し方: %s\n"
+
+#: pg_resetxlog.c:629
+#, c-format
+msgid "Data page checksum version: %u\n"
+msgstr "データベージチェックサムのバージョン: %u\n"
+
+#: pg_resetxlog.c:692
+#, c-format
+msgid "%s: internal error -- sizeof(ControlFileData) is too large ... fix PG_CONTROL_SIZE\n"
+msgstr ""
+"%s: 内部エラー -- sizeof(ControlFileData)が大きすぎます \n"
+"... PG_CONTROL_SIZE を修正してください\n"
+
+#: pg_resetxlog.c:707
+#, c-format
+msgid "%s: could not create pg_control file: %s\n"
+msgstr "%s: pg_controlファイルを作成できませんでした: %s\n"
+
+#: pg_resetxlog.c:718
+#, c-format
+msgid "%s: could not write pg_control file: %s\n"
+msgstr "%s: pg_controlファイルを書き込めませんでした: %s\n"
+
+#: pg_resetxlog.c:725 pg_resetxlog.c:1024
+#, c-format
+msgid "%s: fsync error: %s\n"
+msgstr "%s: fsyncエラー: %s\n"
+
+#: pg_resetxlog.c:765 pg_resetxlog.c:836 pg_resetxlog.c:892
+#, c-format
+msgid "%s: could not open directory \"%s\": %s\n"
+msgstr "%s: ディレクトリ\"%s\"をオープンできませんでした: %s\n"
+
+#: pg_resetxlog.c:807 pg_resetxlog.c:869 pg_resetxlog.c:926
+#, c-format
+msgid "%s: could not read from directory \"%s\": %s\n"
+msgstr "%s: ディレクトリ\"%s\"から読み込めませんでした: %s\n"
+
+#: pg_resetxlog.c:850 pg_resetxlog.c:907
+#, c-format
+msgid "%s: could not delete file \"%s\": %s\n"
+msgstr "%s: ファイル\"%s\"を削除できませんでした: %s\n"
+
+#: pg_resetxlog.c:991
+#, c-format
+msgid "%s: could not open file \"%s\": %s\n"
+msgstr "%s: ファイル\"%s\"をオープンできませんでした: %s\n"
+
+#: pg_resetxlog.c:1002 pg_resetxlog.c:1016
+#, c-format
+msgid "%s: could not write file \"%s\": %s\n"
+msgstr "%s: ファイル\"%s\"を書き込めませんでした: %s\n"
+
+#: pg_resetxlog.c:1035
+#, c-format
+msgid ""
+"%s resets the PostgreSQL transaction log.\n"
+"\n"
+msgstr ""
+"%sはPostgreSQLのトランザクションログをリセットします。\n"
+"\n"
+
+#: pg_resetxlog.c:1036
+#, c-format
+msgid ""
+"Usage:\n"
+" %s [OPTION]... DATADIR\n"
+"\n"
+msgstr ""
+"使用方法:\n"
+" %s [OPTION]... DATADIR\n"
+"\n"
+
+#: pg_resetxlog.c:1037
+#, c-format
+msgid "Options:\n"
+msgstr "オプション:\n"
+
+#: pg_resetxlog.c:1038
+#, c-format
+msgid " -e XIDEPOCH set next transaction ID epoch\n"
+msgstr " -e XIDEPOCH 次のトランザクションIDエポックを設定します\n"
+
+#: pg_resetxlog.c:1039
+#, c-format
+msgid " -f force update to be done\n"
+msgstr " -f 強制的に更新を実施します\n"
+
+#: pg_resetxlog.c:1040
+#, c-format
+#| msgid " -l TLI,FILE,SEG force minimum WAL starting location for new transaction log\n"
+msgid " -l XLOGFILE force minimum WAL starting location for new transaction log\n"
+msgstr " -l XLOGFILE 新しいトランザクションログの最小WAL開始ポイントを強制します\n\n"
+
+#: pg_resetxlog.c:1041
+#, c-format
+#| msgid " -m XID set next multitransaction ID\n"
+msgid " -m MXID,MXID set next and oldest multitransaction ID\n"
+msgstr " -m MXID,MXID 次の最も古いマルチトランザクションIDを設定します\n"
+
+#: pg_resetxlog.c:1042
+#, c-format
+msgid " -n no update, just show extracted control values (for testing)\n"
+msgstr " -n 更新をせず、単に取り出した制御値を表示します(試験用)\n"
+
+#: pg_resetxlog.c:1043
+#, c-format
+msgid " -o OID set next OID\n"
+msgstr " -o OID 次のOIDを設定します\n"
+
+#: pg_resetxlog.c:1044
+#, c-format
+msgid " -O OFFSET set next multitransaction offset\n"
+msgstr " -O OFFSET 次のマルチトランザクションオフセットを設定します\n"
+
+#: pg_resetxlog.c:1045
+#, c-format
+msgid " -V, --version output version information, then exit\n"
+msgstr " -V, --version バージョン情報を表示し、終了します\n"
+
+#: pg_resetxlog.c:1046
+#, c-format
+msgid " -x XID set next transaction ID\n"
+msgstr " -x XID 次のトランザクションIDを設定します\n"
+
+#: pg_resetxlog.c:1047
+#, c-format
+msgid " -?, --help show this help, then exit\n"
+msgstr " -?, --help このヘルプを表示し、終了します\n"
+
+#: pg_resetxlog.c:1048
+#, c-format
+msgid ""
+"\n"
+"Report bugs to .\n"
+msgstr ""
+"\n"
+"不具合はまで報告してください。\n"
+
+#~ msgid " --version output version information, then exit\n"
+#~ msgstr " --version バージョン情報を表示し、終了します\n"
+
+#~ msgid " --help show this help, then exit\n"
+#~ msgstr " --help ヘルプを表示し、終了します\n"
+
+#~ msgid "First log file ID after reset: %u\n"
+#~ msgstr "リセット後、現在のログファイルID: %u\n"
diff --git a/src/bin/pg_resetwal/po/ko.po b/src/bin/pg_resetwal/po/ko.po
new file mode 100644
index 0000000000..47d83c3a97
--- /dev/null
+++ b/src/bin/pg_resetwal/po/ko.po
@@ -0,0 +1,616 @@
+# Korean message translation file for PostgreSQL pg_resetxlog
+# Ioseph Kim , 2004.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PostgreSQL 9.5\n"
+"Report-Msgid-Bugs-To: pgsql-bugs@postgresql.org\n"
+"POT-Creation-Date: 2016-01-27 10:03+0900\n"
+"PO-Revision-Date: 2016-01-29 13:45+0900\n"
+"Last-Translator: Ioseph Kim \n"
+"Language-Team: Korean Team \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=utf-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Language: ko\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+
+#: ../../common/restricted_token.c:68
+#, c-format
+msgid "%s: WARNING: cannot create restricted tokens on this platform\n"
+msgstr "%s: 경고: 이 운영체제에서 restricted token을 만들 수 없음\n"
+
+#: ../../common/restricted_token.c:77
+#, c-format
+msgid "%s: could not open process token: error code %lu\n"
+msgstr "%s: 프로세스 토큰을 열 수 없음: 오류 코드 %lu\n"
+
+#: ../../common/restricted_token.c:90
+#, c-format
+msgid "%s: could not allocate SIDs: error code %lu\n"
+msgstr "%s: SID를 할당할 수 없음: 오류 코드 %lu\n"
+
+#: ../../common/restricted_token.c:110
+#, c-format
+msgid "%s: could not create restricted token: error code %lu\n"
+msgstr "%s: 상속된 토큰을 만들 수 없음: 오류 코드 %lu\n"
+
+#: ../../common/restricted_token.c:132
+#, c-format
+msgid "%s: could not start process for command \"%s\": error code %lu\n"
+msgstr "%s: \"%s\" 명령용 프로세스를 시작할 수 없음: 오류 코드 %lu\n"
+
+#: ../../common/restricted_token.c:170
+#, c-format
+msgid "%s: could not re-execute with restricted token: error code %lu\n"
+msgstr "%s: 상속된 토큰으로 재실행할 수 없음: 오류 코드 %lu\n"
+
+#: ../../common/restricted_token.c:186
+#, c-format
+msgid "%s: could not get exit code from subprocess: error code %lu\n"
+msgstr "%s: 하위 프로세스의 종료 코드를 구할 수 없음: 오류 코드 %lu\n"
+
+#. translator: the second %s is a command line argument (-e, etc)
+#: pg_resetxlog.c:140 pg_resetxlog.c:155 pg_resetxlog.c:170 pg_resetxlog.c:177
+#: pg_resetxlog.c:201 pg_resetxlog.c:216 pg_resetxlog.c:224 pg_resetxlog.c:250
+#: pg_resetxlog.c:264
+#, c-format
+msgid "%s: invalid argument for option %s\n"
+msgstr "%s: %s 옵션의 잘못된 인자\n"
+
+#: pg_resetxlog.c:141 pg_resetxlog.c:156 pg_resetxlog.c:171 pg_resetxlog.c:178
+#: pg_resetxlog.c:202 pg_resetxlog.c:217 pg_resetxlog.c:225 pg_resetxlog.c:251
+#: pg_resetxlog.c:265 pg_resetxlog.c:272 pg_resetxlog.c:285 pg_resetxlog.c:293
+#, c-format
+msgid "Try \"%s --help\" for more information.\n"
+msgstr "자세한 사용법은 \"%s --help\"\n"
+
+#: pg_resetxlog.c:146
+#, c-format
+msgid "%s: transaction ID epoch (-e) must not be -1\n"
+msgstr "%s: 트랜잭션 ID epoch (-e) 값은 -1이 아니여야함\n"
+
+#: pg_resetxlog.c:161
+#, c-format
+msgid "%s: transaction ID (-x) must not be 0\n"
+msgstr "%s: 트랜잭션 ID (-x) 값은 0이 아니여야함\n"
+
+#: pg_resetxlog.c:185 pg_resetxlog.c:192
+#, c-format
+msgid ""
+"%s: transaction ID (-c) must be either 0 or greater than or equal to 2\n"
+msgstr ""
+"%s: -c 옵션으로 지정한 트랜잭션 ID는 0이거나 2이상이어야 함\n"
+
+#: pg_resetxlog.c:207
+#, c-format
+msgid "%s: OID (-o) must not be 0\n"
+msgstr "%s: OID (-o) 값은 0이 아니여야함\n"
+
+#: pg_resetxlog.c:230
+#, c-format
+msgid "%s: multitransaction ID (-m) must not be 0\n"
+msgstr "%s: 멀티트랜잭션 ID (-m) 값은 0이 아니여야함\n"
+
+#: pg_resetxlog.c:240
+#, c-format
+msgid "%s: oldest multitransaction ID (-m) must not be 0\n"
+msgstr "%s: 제일 오래된 멀티트랜잭션 ID (-m) 값은 0이 아니여야함\n"
+
+#: pg_resetxlog.c:256
+#, c-format
+msgid "%s: multitransaction offset (-O) must not be -1\n"
+msgstr "%s: 멀티트랜잭션 옵셋 (-O) 값은 -1이 아니여야함\n"
+
+#: pg_resetxlog.c:283
+#, c-format
+msgid "%s: too many command-line arguments (first is \"%s\")\n"
+msgstr "%s: 너무 많은 명령행 인수를 지정했습니다. (처음 \"%s\")\n"
+
+#: pg_resetxlog.c:292
+#, c-format
+msgid "%s: no data directory specified\n"
+msgstr "%s: 데이터 디렉터리를 지정하지 않았음\n"
+
+#: pg_resetxlog.c:306
+#, c-format
+msgid "%s: cannot be executed by \"root\"\n"
+msgstr "%s: 이 프로그램은 \"root\"로 실행될 수 없음\n"
+
+#: pg_resetxlog.c:308
+#, c-format
+msgid "You must run %s as the PostgreSQL superuser.\n"
+msgstr "PostgreSQL superuser로 %s 프로그램을 실행하십시오.\n"
+
+#: pg_resetxlog.c:318
+#, c-format
+msgid "%s: could not change directory to \"%s\": %s\n"
+msgstr "%s: \"%s\" 디렉터리로 바꿀 수 없음: %s\n"
+
+#: pg_resetxlog.c:331 pg_resetxlog.c:477
+#, c-format
+msgid "%s: could not open file \"%s\" for reading: %s\n"
+msgstr "%s: \"%s\" 파일을 읽기 모드로 열 수 없음: %s\n"
+
+#: pg_resetxlog.c:338
+#, c-format
+msgid ""
+"%s: lock file \"%s\" exists\n"
+"Is a server running? If not, delete the lock file and try again.\n"
+msgstr ""
+"%s: \"%s\" 잠금 파일이 있습니다.\n"
+"서버가 가동중인가요? 그렇지 않다면, 이 파일을 지우고 다시 시도하십시오.\n"
+
+#: pg_resetxlog.c:425
+#, c-format
+msgid ""
+"\n"
+"If these values seem acceptable, use -f to force reset.\n"
+msgstr ""
+"\n"
+"이 설정값들이 타당하다고 판단되면, 강제로 갱신하려면, -f 옵션을 쓰세요.\n"
+
+#: pg_resetxlog.c:437
+#, c-format
+msgid ""
+"The database server was not shut down cleanly.\n"
+"Resetting the transaction log might cause data to be lost.\n"
+"If you want to proceed anyway, use -f to force reset.\n"
+msgstr ""
+"이 데이터베이스 서버는 정상적으로 중지되지 못했습니다.\n"
+"트랜잭션 로그를 다시 설정하는 것은 자료 손실을 야기할 수 있습니다.\n"
+"그럼에도 불구하고 진행하려면, -f 옵션을 사용해서 강제 설정을 하십시오.\n"
+
+#: pg_resetxlog.c:451
+#, c-format
+msgid "Transaction log reset\n"
+msgstr "트랜잭션 로그 재설정\n"
+
+#: pg_resetxlog.c:480
+#, c-format
+msgid ""
+"If you are sure the data directory path is correct, execute\n"
+" touch %s\n"
+"and try again.\n"
+msgstr ""
+"지정한 데이터 디렉터리가 맞다면, 다음 명령을 실행하고, 다시 시도해\n"
+"보십시오.\n"
+" touch %s\n"
+"(win32에서 어떻게 하나?)\n"
+
+#: pg_resetxlog.c:493
+#, c-format
+msgid "%s: could not read file \"%s\": %s\n"
+msgstr "%s: \"%s\" 파일을 읽을 수 없음: %s\n"
+
+#: pg_resetxlog.c:516
+#, c-format
+msgid "%s: pg_control exists but has invalid CRC; proceed with caution\n"
+msgstr ""
+"%s: pg_control파일이 있지만, CRC값이 잘못되었습니다; 경고과 함께 진행함\n"
+
+#: pg_resetxlog.c:525
+#, c-format
+msgid "%s: pg_control exists but is broken or unknown version; ignoring it\n"
+msgstr "%s: pg_control파일이 있지만, 손상되었거나 버전을 알 수 없음; 무시함\n"
+
+#: pg_resetxlog.c:628
+#, c-format
+msgid ""
+"Guessed pg_control values:\n"
+"\n"
+msgstr ""
+"추측된 pg_control 설정값들:\n"
+"\n"
+
+#: pg_resetxlog.c:630
+#, c-format
+msgid ""
+"Current pg_control values:\n"
+"\n"
+msgstr ""
+"현재 pg_control 설정값들:\n"
+"\n"
+
+#: pg_resetxlog.c:639
+#, c-format
+msgid "pg_control version number: %u\n"
+msgstr "pg_control 버전 번호: %u\n"
+
+#: pg_resetxlog.c:641
+#, c-format
+msgid "Catalog version number: %u\n"
+msgstr "카탈로그 버전 번호: %u\n"
+
+#: pg_resetxlog.c:643
+#, c-format
+msgid "Database system identifier: %s\n"
+msgstr "데이터베이스 시스템 식별자: %s\n"
+
+#: pg_resetxlog.c:645
+#, c-format
+msgid "Latest checkpoint's TimeLineID: %u\n"
+msgstr "마지막 체크포인트 TimeLineID: %u\n"
+
+#: pg_resetxlog.c:647
+#, c-format
+msgid "Latest checkpoint's full_page_writes: %s\n"
+msgstr "마지막 체크포인트 full_page_writes: %s\n"
+
+#: pg_resetxlog.c:648
+msgid "off"
+msgstr "off"
+
+#: pg_resetxlog.c:648
+msgid "on"
+msgstr "on"
+
+#: pg_resetxlog.c:649
+#, c-format
+msgid "Latest checkpoint's NextXID: %u/%u\n"
+msgstr "마지막 체크포인트 NextXID: %u/%u\n"
+
+#: pg_resetxlog.c:652
+#, c-format
+msgid "Latest checkpoint's NextOID: %u\n"
+msgstr "마지막 체크포인트 NextOID: %u\n"
+
+#: pg_resetxlog.c:654
+#, c-format
+msgid "Latest checkpoint's NextMultiXactId: %u\n"
+msgstr "마지막 체크포인트 NextMultiXactId: %u\n"
+
+#: pg_resetxlog.c:656
+#, c-format
+msgid "Latest checkpoint's NextMultiOffset: %u\n"
+msgstr "마지막 체크포인트 NextMultiOffset: %u\n"
+
+#: pg_resetxlog.c:658
+#, c-format
+msgid "Latest checkpoint's oldestXID: %u\n"
+msgstr "마지막 체크포인트 제일 오래된 XID: %u\n"
+
+#: pg_resetxlog.c:660
+#, c-format
+msgid "Latest checkpoint's oldestXID's DB: %u\n"
+msgstr "마지막 체크포인트 제일 오래된 XID의 DB:%u\n"
+
+#: pg_resetxlog.c:662
+#, c-format
+msgid "Latest checkpoint's oldestActiveXID: %u\n"
+msgstr "마지막 체크포인트 제일 오래된 ActiveXID:%u\n"
+
+#: pg_resetxlog.c:664
+#, c-format
+msgid "Latest checkpoint's oldestMultiXid: %u\n"
+msgstr "마지막 체크포인트 제일 오래된 MultiXid:%u\n"
+
+#: pg_resetxlog.c:666
+#, c-format
+msgid "Latest checkpoint's oldestMulti's DB: %u\n"
+msgstr "마지막 체크포인트 제일 오래된 MultiXid의 DB:%u\n"
+
+#: pg_resetxlog.c:668
+#, c-format
+msgid "Latest checkpoint's oldestCommitTsXid:%u\n"
+msgstr "마지막 체크포인트 제일 오래된 CommitTsXid:%u\n"
+
+#: pg_resetxlog.c:670
+#, c-format
+msgid "Latest checkpoint's newestCommitTsXid:%u\n"
+msgstr "마지막 체크포인트 최신 CommitTsXid: %u\n"
+
+#: pg_resetxlog.c:672
+#, c-format
+msgid "Maximum data alignment: %u\n"
+msgstr "최대 자료 정렬: %u\n"
+
+#: pg_resetxlog.c:675
+#, c-format
+msgid "Database block size: %u\n"
+msgstr "데이터베이스 블록 크기: %u\n"
+
+#: pg_resetxlog.c:677
+#, c-format
+msgid "Blocks per segment of large relation: %u\n"
+msgstr "대형 릴레이션의 세그먼트당 블럭 갯수: %u\n"
+
+#: pg_resetxlog.c:679
+#, c-format
+msgid "WAL block size: %u\n"
+msgstr "WAL 블록 크기: %u\n"
+
+#: pg_resetxlog.c:681
+#, c-format
+msgid "Bytes per WAL segment: %u\n"
+msgstr "WAL 세그먼트의 크기(byte): %u\n"
+
+#: pg_resetxlog.c:683
+#, c-format
+msgid "Maximum length of identifiers: %u\n"
+msgstr "식별자 최대 길이: %u\n"
+
+#: pg_resetxlog.c:685
+#, c-format
+msgid "Maximum columns in an index: %u\n"
+msgstr "인덱스에서 사용하는 최대 열 수: %u\n"
+
+#: pg_resetxlog.c:687
+#, c-format
+msgid "Maximum size of a TOAST chunk: %u\n"
+msgstr "TOAST 청크의 최대 크기: %u\n"
+
+#: pg_resetxlog.c:689
+#, c-format
+msgid "Size of a large-object chunk: %u\n"
+msgstr "대형객체 청크의 최대 크기: %u\n"
+
+#: pg_resetxlog.c:691
+#, c-format
+msgid "Date/time type storage: %s\n"
+msgstr "날짜/시간형 자료의 저장방식: %s\n"
+
+#: pg_resetxlog.c:692
+msgid "64-bit integers"
+msgstr "64-비트 정수"
+
+#: pg_resetxlog.c:692
+msgid "floating-point numbers"
+msgstr "부동소수"
+
+#: pg_resetxlog.c:693
+#, c-format
+msgid "Float4 argument passing: %s\n"
+msgstr "Float4 인수 전달: %s\n"
+
+#: pg_resetxlog.c:694 pg_resetxlog.c:696
+msgid "by reference"
+msgstr "참조별"
+
+#: pg_resetxlog.c:694 pg_resetxlog.c:696
+msgid "by value"
+msgstr "값별"
+
+#: pg_resetxlog.c:695
+#, c-format
+msgid "Float8 argument passing: %s\n"
+msgstr "Float8 인수 전달: %s\n"
+
+#: pg_resetxlog.c:697
+#, c-format
+msgid "Data page checksum version: %u\n"
+msgstr "데이터 페이지 체크섬 버전: %u\n"
+
+#: pg_resetxlog.c:711
+#, c-format
+msgid ""
+"\n"
+"\n"
+"Values to be changed:\n"
+"\n"
+msgstr ""
+"\n"
+"\n"
+"변경될 값:\n"
+"\n"
+
+#: pg_resetxlog.c:714
+#, c-format
+msgid "First log segment after reset: %s\n"
+msgstr "리셋 뒤 첫 로그 세그먼트: %s\n"
+
+#: pg_resetxlog.c:718
+#, c-format
+msgid "NextMultiXactId: %u\n"
+msgstr "NextMultiXactId: %u\n"
+
+#: pg_resetxlog.c:720
+#, c-format
+msgid "OldestMultiXid: %u\n"
+msgstr "OldestMultiXid: %u\n"
+
+#: pg_resetxlog.c:722
+#, c-format
+msgid "OldestMulti's DB: %u\n"
+msgstr "OldestMultiXid의 DB: %u\n"
+
+#: pg_resetxlog.c:728
+#, c-format
+msgid "NextMultiOffset: %u\n"
+msgstr "NextMultiOffset: %u\n"
+
+#: pg_resetxlog.c:734
+#, c-format
+msgid "NextOID: %u\n"
+msgstr "NextOID: %u\n"
+
+#: pg_resetxlog.c:740
+#, c-format
+msgid "NextXID: %u\n"
+msgstr "NextXID: %u\n"
+
+#: pg_resetxlog.c:742
+#, c-format
+msgid "OldestXID: %u\n"
+msgstr "OldestXID: %u\n"
+
+#: pg_resetxlog.c:744
+#, c-format
+msgid "OldestXID's DB: %u\n"
+msgstr "OldestXID의 DB: %u\n"
+
+#: pg_resetxlog.c:750
+#, c-format
+msgid "NextXID epoch: %u\n"
+msgstr "NextXID epoch: %u\n"
+
+#: pg_resetxlog.c:756
+#, c-format
+msgid "oldestCommitTsXid: %u\n"
+msgstr "제일 오래된 CommitTsXid: %u\n"
+
+#: pg_resetxlog.c:761
+#, c-format
+msgid "newestCommitTsXid: %u\n"
+msgstr "최근 CommitTsXid: %u\n"
+
+#: pg_resetxlog.c:827
+#, c-format
+msgid ""
+"%s: internal error -- sizeof(ControlFileData) is too large ... fix "
+"PG_CONTROL_SIZE\n"
+msgstr ""
+"%s: 내부 오류 -- sizeof(ControlFileData) 값이 너무 큼 ... PG_CONTROL_SIZE 고"
+"쳐야함\n"
+
+#: pg_resetxlog.c:842
+#, c-format
+msgid "%s: could not create pg_control file: %s\n"
+msgstr "%s: pg_control 파일 만들 수 없음: %s\n"
+
+#: pg_resetxlog.c:853
+#, c-format
+msgid "%s: could not write pg_control file: %s\n"
+msgstr "%s: pg_control 파일 쓸 수 없음: %s\n"
+
+#: pg_resetxlog.c:860 pg_resetxlog.c:1156
+#, c-format
+msgid "%s: fsync error: %s\n"
+msgstr "%s: fsync 오류: %s\n"
+
+#: pg_resetxlog.c:900 pg_resetxlog.c:971 pg_resetxlog.c:1022
+#, c-format
+msgid "%s: could not open directory \"%s\": %s\n"
+msgstr "%s: \"%s\" 디렉터리 열 수 없음: %s\n"
+
+#: pg_resetxlog.c:936 pg_resetxlog.c:993 pg_resetxlog.c:1047
+#, c-format
+msgid "%s: could not read directory \"%s\": %s\n"
+msgstr "%s: \"%s\" 디렉터리를 읽을 수 없음: %s\n"
+
+#: pg_resetxlog.c:943 pg_resetxlog.c:1000 pg_resetxlog.c:1054
+#, c-format
+msgid "%s: could not close directory \"%s\": %s\n"
+msgstr "%s: \"%s\" 디렉터리를 닫을 수 없음: %s\n"
+
+#: pg_resetxlog.c:984 pg_resetxlog.c:1038
+#, c-format
+msgid "%s: could not delete file \"%s\": %s\n"
+msgstr "%s: \"%s\" 파일 삭제 할 수 없음: %s\n"
+
+#: pg_resetxlog.c:1123
+#, c-format
+msgid "%s: could not open file \"%s\": %s\n"
+msgstr "%s: \"%s\" 파일 열 수 없음: %s\n"
+
+#: pg_resetxlog.c:1134 pg_resetxlog.c:1148
+#, c-format
+msgid "%s: could not write file \"%s\": %s\n"
+msgstr "%s: \"%s\" 파일 쓸 수 없음: %s\n"
+
+#: pg_resetxlog.c:1167
+#, c-format
+msgid ""
+"%s resets the PostgreSQL transaction log.\n"
+"\n"
+msgstr ""
+"%s 프로그램은 PostgreSQL 트랜잭션 로그를 다시 설정합니다.\n"
+"\n"
+
+#: pg_resetxlog.c:1168
+#, c-format
+msgid ""
+"Usage:\n"
+" %s [OPTION]... DATADIR\n"
+"\n"
+msgstr ""
+"사용법:\n"
+" %s [옵션]... DATADIR\n"
+"\n"
+
+#: pg_resetxlog.c:1169
+#, c-format
+msgid "Options:\n"
+msgstr "옵션들:\n"
+
+#: pg_resetxlog.c:1170
+#, c-format
+msgid ""
+" -c XID,XID set oldest and newest transactions bearing commit "
+"timestamp\n"
+msgstr ""
+" -c XID,XID 커밋 시간을 도출하는 제일 오래된, 최신의 트랜잭션 지정\n"
+
+#: pg_resetxlog.c:1171
+#, c-format
+msgid " (zero in either value means no change)\n"
+msgstr " (0으로 지정하면 바꾸지 않음)\n"
+
+#: pg_resetxlog.c:1172
+#, c-format
+msgid " [-D] DATADIR data directory\n"
+msgstr " [-D] DATADIR 데이터 디렉터리\n"
+
+#: pg_resetxlog.c:1173
+#, c-format
+msgid " -e XIDEPOCH set next transaction ID epoch\n"
+msgstr " -e XIDEPOCH 다음 트랙잭션 ID epoch 지정\n"
+
+#: pg_resetxlog.c:1174
+#, c-format
+msgid " -f force update to be done\n"
+msgstr " -f 강제로 갱신함\n"
+
+#: pg_resetxlog.c:1175
+#, c-format
+msgid ""
+" -l XLOGFILE force minimum WAL starting location for new transaction "
+"log\n"
+msgstr ""
+" -l XLOGFILE 새 트랜잭션 로그를 위한 WAL 최소 시작 위치를 강제로 지정\n"
+
+#: pg_resetxlog.c:1176
+#, c-format
+msgid " -m MXID,MXID set next and oldest multitransaction ID\n"
+msgstr " -m MXID,MXID 다음 제일 오래된 멀티트랜잭션 ID 지정\n"
+
+#: pg_resetxlog.c:1177
+#, c-format
+msgid ""
+" -n no update, just show what would be done (for testing)\n"
+msgstr ""
+" -n 갱신하지 않음, 컨트롤 값들을 보여주기만 함(테스트용)\n"
+
+#: pg_resetxlog.c:1178
+#, c-format
+msgid " -o OID set next OID\n"
+msgstr " -o OID 다음 OID 지정\n"
+
+#: pg_resetxlog.c:1179
+#, c-format
+msgid " -O OFFSET set next multitransaction offset\n"
+msgstr " -O OFFSET 다음 멀티트랜잭션 옵셋 지정\n"
+
+#: pg_resetxlog.c:1180
+#, c-format
+msgid " -V, --version output version information, then exit\n"
+msgstr " -V, --version 버전 정보 보여주고 마침\n"
+
+#: pg_resetxlog.c:1181
+#, c-format
+msgid " -x XID set next transaction ID\n"
+msgstr " -x XID 다음 XID(트랜잭션 ID) 지정\n"
+
+#: pg_resetxlog.c:1182
+#, c-format
+msgid " -?, --help show this help, then exit\n"
+msgstr " -?, --help 이 도움말을 보여주고 마침\n"
+
+#: pg_resetxlog.c:1183
+#, c-format
+msgid ""
+"\n"
+"Report bugs to .\n"
+msgstr ""
+"\n"
+"오류보고: .\n"
diff --git a/src/bin/pg_resetwal/po/pl.po b/src/bin/pg_resetwal/po/pl.po
new file mode 100644
index 0000000000..d87214bc53
--- /dev/null
+++ b/src/bin/pg_resetwal/po/pl.po
@@ -0,0 +1,626 @@
+# pg_resetxlog message translation file for pg_resetxlog
+# Copyright (C) 2011 PostgreSQL Global Development Group
+# This file is distributed under the same license as the PostgreSQL package.
+# Begina Felicysym , 2011, 2012, 2013.
+# grzegorz , 2014, 2015, 2016.
+msgid ""
+msgstr ""
+"Project-Id-Version: pg_resetxlog (PostgreSQL 9.1)\n"
+"Report-Msgid-Bugs-To: pgsql-bugs@postgresql.org\n"
+"POT-Creation-Date: 2016-07-03 03:14+0000\n"
+"PO-Revision-Date: 2016-07-03 17:54+0200\n"
+"Last-Translator: grzegorz \n"
+"Language-Team: begina.felicysym@wp.eu\n"
+"Language: pl\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=3; plural=(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 "
+"|| n%100>=20) ? 1 : 2);\n"
+"X-Generator: Virtaal 0.7.1\n"
+
+#: ../../common/restricted_token.c:68
+#, c-format
+msgid "%s: WARNING: cannot create restricted tokens on this platform\n"
+msgstr "%s: OSTRZEŻENIE nie można tworzyć ograniczonych tokenów na tej platformie\n"
+
+#: ../../common/restricted_token.c:77
+#, c-format
+msgid "%s: could not open process token: error code %lu\n"
+msgstr "%s: nie można otworzyć tokenu procesu: kod błędu %lu\n"
+
+#: ../../common/restricted_token.c:90
+#, c-format
+msgid "%s: could not allocate SIDs: error code %lu\n"
+msgstr "%s: nie udało się przydzielić SIDów: kod błędu %lu\n"
+
+#: ../../common/restricted_token.c:110
+#, c-format
+msgid "%s: could not create restricted token: error code %lu\n"
+msgstr "%s: nie udało się utworzyć ograniczonego tokena: kod błędu %lu\n"
+
+#: ../../common/restricted_token.c:132
+#, c-format
+msgid "%s: could not start process for command \"%s\": error code %lu\n"
+msgstr "%s: nie udało się uruchomić procesu dla polecenia \"%s\": kod błędu %lu\n"
+
+#: ../../common/restricted_token.c:170
+#, c-format
+msgid "%s: could not re-execute with restricted token: error code %lu\n"
+msgstr "%s: nie udało się ponownie wykonać ograniczonego tokena: %lu\n"
+
+#: ../../common/restricted_token.c:186
+#, c-format
+msgid "%s: could not get exit code from subprocess: error code %lu\n"
+msgstr "%s: nie udało uzyskać kodu wyjścia z usługi podrzędnej: kod błędu %lu\n"
+
+#. translator: the second %s is a command line argument (-e, etc)
+#: pg_resetxlog.c:140 pg_resetxlog.c:155 pg_resetxlog.c:170 pg_resetxlog.c:177
+#: pg_resetxlog.c:201 pg_resetxlog.c:216 pg_resetxlog.c:224 pg_resetxlog.c:250
+#: pg_resetxlog.c:264
+#, c-format
+msgid "%s: invalid argument for option %s\n"
+msgstr "%s: niepoprawny argument dla opcji %s\n"
+
+#: pg_resetxlog.c:141 pg_resetxlog.c:156 pg_resetxlog.c:171 pg_resetxlog.c:178
+#: pg_resetxlog.c:202 pg_resetxlog.c:217 pg_resetxlog.c:225 pg_resetxlog.c:251
+#: pg_resetxlog.c:265 pg_resetxlog.c:272 pg_resetxlog.c:285 pg_resetxlog.c:293
+#, c-format
+msgid "Try \"%s --help\" for more information.\n"
+msgstr "Spróbuj \"%s --help\" aby uzyskać więcej informacji.\n"
+
+#: pg_resetxlog.c:146
+#, c-format
+msgid "%s: transaction ID epoch (-e) must not be -1\n"
+msgstr "%s: epoka ID transakcji (-e) nie może być -1\n"
+
+#: pg_resetxlog.c:161
+#, c-format
+msgid "%s: transaction ID (-x) must not be 0\n"
+msgstr "%s: ID transakcji (-x) nie może być 0\n"
+
+#: pg_resetxlog.c:185 pg_resetxlog.c:192
+#, c-format
+msgid "%s: transaction ID (-c) must be either 0 or greater than or equal to 2\n"
+msgstr "%s: ID transakcji (-c) musi być albo 0 albo większa lub równa 2\n"
+
+#: pg_resetxlog.c:207
+#, c-format
+msgid "%s: OID (-o) must not be 0\n"
+msgstr "%s: OID (-o) nie może być 0\n"
+
+#: pg_resetxlog.c:230
+#, c-format
+msgid "%s: multitransaction ID (-m) must not be 0\n"
+msgstr "%s: ID multitransakcji (-m) nie może być 0\n"
+
+#: pg_resetxlog.c:240
+#, c-format
+msgid "%s: oldest multitransaction ID (-m) must not be 0\n"
+msgstr "%s: najstarszy ID multitransakcji (-m) nie może być 0\n"
+
+#: pg_resetxlog.c:256
+#, c-format
+msgid "%s: multitransaction offset (-O) must not be -1\n"
+msgstr "%s: offset multitransakcji (-O) nie może być -1\n"
+
+#: pg_resetxlog.c:283
+#, c-format
+msgid "%s: too many command-line arguments (first is \"%s\")\n"
+msgstr "%s: za duża ilość parametrów (pierwszy to \"%s\")\n"
+
+#: pg_resetxlog.c:292
+#, c-format
+msgid "%s: no data directory specified\n"
+msgstr "%s: katalog danych nie został ustawiony\n"
+
+#: pg_resetxlog.c:306
+#, c-format
+msgid "%s: cannot be executed by \"root\"\n"
+msgstr "%s: nie może być wykonywane pod \"rootem\"\n"
+
+#: pg_resetxlog.c:308
+#, c-format
+msgid "You must run %s as the PostgreSQL superuser.\n"
+msgstr "Musisz uruchomić %s jako superużytkownik PostgreSQL.\n"
+
+#: pg_resetxlog.c:318
+#, c-format
+msgid "%s: could not change directory to \"%s\": %s\n"
+msgstr "%s: nie można zmienić katalogu na \"%s\": %s\n"
+
+#: pg_resetxlog.c:331 pg_resetxlog.c:477
+#, c-format
+msgid "%s: could not open file \"%s\" for reading: %s\n"
+msgstr "%s: nie można otworzyć pliku \"%s\" do odczytu: %s\n"
+
+#: pg_resetxlog.c:338
+#, c-format
+msgid ""
+"%s: lock file \"%s\" exists\n"
+"Is a server running? If not, delete the lock file and try again.\n"
+msgstr ""
+"%s: plik blokady \"%s\" istnieje\n"
+"Czy serwer działa? Jeśli nie, usuń plik blokady i spróbuj ponownie.\n"
+
+#: pg_resetxlog.c:425
+#, c-format
+msgid ""
+"\n"
+"If these values seem acceptable, use -f to force reset.\n"
+msgstr ""
+"\n"
+"Jeśli te wartości wydają się do przyjęcia, użyj -f by wymusić reset.\n"
+
+#: pg_resetxlog.c:437
+#, c-format
+msgid ""
+"The database server was not shut down cleanly.\n"
+"Resetting the transaction log might cause data to be lost.\n"
+"If you want to proceed anyway, use -f to force reset.\n"
+msgstr ""
+"Serwer bazy danych nie został poprawnie zamknięty.\n"
+"Zresetowanie dziennika transakcji może spowodować utratę danych.\n"
+"Jeśli chcesz kontynuować, użyj -f, aby wymusić reset.\n"
+
+#: pg_resetxlog.c:451
+#, c-format
+msgid "Transaction log reset\n"
+msgstr "Reset dziennika transakcji\n"
+
+#: pg_resetxlog.c:480
+#, c-format
+msgid ""
+"If you are sure the data directory path is correct, execute\n"
+" touch %s\n"
+"and try again.\n"
+msgstr ""
+"Jeśli jesteś pewien, że ścieżka folder u jest poprawna, wykonaj\n"
+" touch %s\n"
+"i spróbuj ponownie.\n"
+
+#: pg_resetxlog.c:493
+#, c-format
+msgid "%s: could not read file \"%s\": %s\n"
+msgstr "%s: nie można odczytać z pliku \"%s\": %s\n"
+
+#: pg_resetxlog.c:516
+#, c-format
+msgid "%s: pg_control exists but has invalid CRC; proceed with caution\n"
+msgstr "%s: pg_control istnieje ale ma niepoprawne CRC; postępuj ostrożnie\n"
+
+#: pg_resetxlog.c:525
+#, c-format
+msgid "%s: pg_control exists but is broken or unknown version; ignoring it\n"
+msgstr "%s: pg_control istnieje ale jest uszkodzony lub ma nieznaną wersję, zignorowano\n"
+
+#: pg_resetxlog.c:628
+#, c-format
+msgid ""
+"Guessed pg_control values:\n"
+"\n"
+msgstr ""
+"Odgadnięte wartości pg_control:\n"
+"\n"
+
+#: pg_resetxlog.c:630
+#, c-format
+msgid ""
+"Current pg_control values:\n"
+"\n"
+msgstr ""
+"Bieżące wartości pg_control:\n"
+"\n"
+
+#: pg_resetxlog.c:639
+#, c-format
+msgid "pg_control version number: %u\n"
+msgstr "pg_control w wersji numer: %u\n"
+
+#: pg_resetxlog.c:641
+#, c-format
+msgid "Catalog version number: %u\n"
+msgstr "Katalog w wersji numer: %u\n"
+
+#: pg_resetxlog.c:643
+#, c-format
+msgid "Database system identifier: %s\n"
+msgstr "Identyfikator systemu bazy danych: %s\n"
+
+#: pg_resetxlog.c:645
+#, c-format
+msgid "Latest checkpoint's TimeLineID: %u\n"
+msgstr "TimeLineID najnowszego punktu kontrolnego: %u\n"
+
+#: pg_resetxlog.c:647
+#, c-format
+msgid "Latest checkpoint's full_page_writes: %s\n"
+msgstr "full_page_writes najnowszego punktu kontrolnego: %s\n"
+
+#: pg_resetxlog.c:648
+msgid "off"
+msgstr "wyłączone"
+
+#: pg_resetxlog.c:648
+msgid "on"
+msgstr "włączone"
+
+#: pg_resetxlog.c:649
+#, c-format
+#| msgid "Latest checkpoint's NextXID: %u/%u\n"
+msgid "Latest checkpoint's NextXID: %u:%u\n"
+msgstr "NextXID najnowszego punktu kontrolnego: %u:%u\n"
+
+#: pg_resetxlog.c:652
+#, c-format
+msgid "Latest checkpoint's NextOID: %u\n"
+msgstr "NextOID najnowszego punktu kontrolnego: %u\n"
+
+#: pg_resetxlog.c:654
+#, c-format
+msgid "Latest checkpoint's NextMultiXactId: %u\n"
+msgstr "NextMultiXactId najnowszego punktu kontrolnego: %u\n"
+
+#: pg_resetxlog.c:656
+#, c-format
+msgid "Latest checkpoint's NextMultiOffset: %u\n"
+msgstr "NextMultiOffset najnowszego punktu kontrolnego: %u\n"
+
+#: pg_resetxlog.c:658
+#, c-format
+msgid "Latest checkpoint's oldestXID: %u\n"
+msgstr "oldestXID najnowszego punktu kontrolnego: %u\n"
+
+#: pg_resetxlog.c:660
+#, c-format
+msgid "Latest checkpoint's oldestXID's DB: %u\n"
+msgstr "DB oldestXID'u najnowszego punktu kontrolnego: %u\n"
+
+#: pg_resetxlog.c:662
+#, c-format
+msgid "Latest checkpoint's oldestActiveXID: %u\n"
+msgstr "NextXID najnowszego punktu kontrolnego: %u\n"
+
+#: pg_resetxlog.c:664
+#, c-format
+msgid "Latest checkpoint's oldestMultiXid: %u\n"
+msgstr "oldestMultiXid najnowszego punktu kontrolnego: %u\n"
+
+#: pg_resetxlog.c:666
+#, c-format
+msgid "Latest checkpoint's oldestMulti's DB: %u\n"
+msgstr "DB oldestMulti'u najnowszego punktu kontrolnego: %u\n"
+
+#: pg_resetxlog.c:668
+#, c-format
+msgid "Latest checkpoint's oldestCommitTsXid:%u\n"
+msgstr "oldestCommitTsXid najnowszego punktu kontrolnego: %u\n"
+
+#: pg_resetxlog.c:670
+#, c-format
+msgid "Latest checkpoint's newestCommitTsXid:%u\n"
+msgstr "newestCommitTsXid najnowszego punktu kontrolnego: %u\n"
+
+#: pg_resetxlog.c:672
+#, c-format
+msgid "Maximum data alignment: %u\n"
+msgstr "Maksymalne wyrównanie danych: %u\n"
+
+#: pg_resetxlog.c:675
+#, c-format
+msgid "Database block size: %u\n"
+msgstr "Wielkość bloku bazy danych: %u\n"
+
+#: pg_resetxlog.c:677
+#, c-format
+msgid "Blocks per segment of large relation: %u\n"
+msgstr "Bloki na segment są w relacji: %u\n"
+
+#: pg_resetxlog.c:679
+#, c-format
+msgid "WAL block size: %u\n"
+msgstr "Wielkość bloku WAL: %u\n"
+
+#: pg_resetxlog.c:681
+#, c-format
+msgid "Bytes per WAL segment: %u\n"
+msgstr "Bajtów na segment WAL: %u\n"
+
+#: pg_resetxlog.c:683
+#, c-format
+msgid "Maximum length of identifiers: %u\n"
+msgstr "Maksymalna długość identyfikatorów: %u\n"
+
+#: pg_resetxlog.c:685
+#, c-format
+msgid "Maximum columns in an index: %u\n"
+msgstr "Maksymalna liczba kolumn w indeksie: %u\n"
+
+#: pg_resetxlog.c:687
+#, c-format
+msgid "Maximum size of a TOAST chunk: %u\n"
+msgstr "Maksymalny rozmiar fragmentu TOAST: %u\n"
+
+#: pg_resetxlog.c:689
+#, c-format
+msgid "Size of a large-object chunk: %u\n"
+msgstr "Rozmiar fragmentu dużego obiektu: %u\n"
+
+#: pg_resetxlog.c:691
+#, c-format
+msgid "Date/time type storage: %s\n"
+msgstr "Typ przechowywania daty/czasu: %s\n"
+
+#: pg_resetxlog.c:692
+msgid "64-bit integers"
+msgstr "64-bit'owe zmienne integer"
+
+#: pg_resetxlog.c:692
+msgid "floating-point numbers"
+msgstr "liczby zmiennoprzecinkowe"
+
+#: pg_resetxlog.c:693
+#, c-format
+msgid "Float4 argument passing: %s\n"
+msgstr "Przekazywanie parametru float4: %s\n"
+
+#: pg_resetxlog.c:694 pg_resetxlog.c:696
+msgid "by reference"
+msgstr "przez referencję"
+
+#: pg_resetxlog.c:694 pg_resetxlog.c:696
+msgid "by value"
+msgstr "przez wartość"
+
+#: pg_resetxlog.c:695
+#, c-format
+msgid "Float8 argument passing: %s\n"
+msgstr "Przekazywanie parametru float8: %s\n"
+
+#: pg_resetxlog.c:697
+#, c-format
+msgid "Data page checksum version: %u\n"
+msgstr "Suma kontrolna strony danych w wersji numer: %u\n"
+
+#: pg_resetxlog.c:711
+#, c-format
+msgid ""
+"\n"
+"\n"
+"Values to be changed:\n"
+"\n"
+msgstr ""
+"\n"
+"\n"
+"Wartości do zmiany:\n"
+"\n"
+
+#: pg_resetxlog.c:714
+#, c-format
+msgid "First log segment after reset: %s\n"
+msgstr "Pierwszy segment dziennika po resecie: %s\n"
+
+#: pg_resetxlog.c:718
+#, c-format
+msgid "NextMultiXactId: %u\n"
+msgstr "NextMultiXactId: %u\n"
+
+#: pg_resetxlog.c:720
+#, c-format
+msgid "OldestMultiXid: %u\n"
+msgstr "OldestMultiXid: %u\n"
+
+#: pg_resetxlog.c:722
+#, c-format
+msgid "OldestMulti's DB: %u\n"
+msgstr "DB OldestMulti'u: %u\n"
+
+#: pg_resetxlog.c:728
+#, c-format
+msgid "NextMultiOffset: %u\n"
+msgstr "NextMultiOffset: %u\n"
+
+#: pg_resetxlog.c:734
+#, c-format
+msgid "NextOID: %u\n"
+msgstr "NextOID: %u\n"
+
+#: pg_resetxlog.c:740
+#, c-format
+msgid "NextXID: %u\n"
+msgstr "NextXID: %u\n"
+
+#: pg_resetxlog.c:742
+#, c-format
+msgid "OldestXID: %u\n"
+msgstr "OldestXID: %u\n"
+
+#: pg_resetxlog.c:744
+#, c-format
+msgid "OldestXID's DB: %u\n"
+msgstr "DB OldestXIDu: %u\n"
+
+#: pg_resetxlog.c:750
+#, c-format
+msgid "NextXID epoch: %u\n"
+msgstr "Epoka NextXID: %u\n"
+
+#: pg_resetxlog.c:756
+#, c-format
+msgid "oldestCommitTsXid: %u\n"
+msgstr "oldestCommitTsXid: %u\n"
+
+#: pg_resetxlog.c:761
+#, c-format
+msgid "newestCommitTsXid: %u\n"
+msgstr "newestCommitTsXid: %u\n"
+
+#: pg_resetxlog.c:827
+#, c-format
+msgid "%s: internal error -- sizeof(ControlFileData) is too large ... fix PG_CONTROL_SIZE\n"
+msgstr "%s: błąd wewnętrzny -- sizeof(ControlFileData) jest zbyt duża ... popraw PG_CONTROL_SIZE\n"
+
+#: pg_resetxlog.c:842
+#, c-format
+msgid "%s: could not create pg_control file: %s\n"
+msgstr "%s: nie można utworzyć pliku pg_control: %s\n"
+
+#: pg_resetxlog.c:853
+#, c-format
+msgid "%s: could not write pg_control file: %s\n"
+msgstr "%s: nie można pisać do pliku pg_control: %s\n"
+
+#: pg_resetxlog.c:860 pg_resetxlog.c:1156
+#, c-format
+msgid "%s: fsync error: %s\n"
+msgstr "%s: błąd fsync: %s\n"
+
+#: pg_resetxlog.c:900 pg_resetxlog.c:971 pg_resetxlog.c:1022
+#, c-format
+msgid "%s: could not open directory \"%s\": %s\n"
+msgstr "%s: nie można otworzyć katalogu \"%s\": %s\n"
+
+#: pg_resetxlog.c:936 pg_resetxlog.c:993 pg_resetxlog.c:1047
+#, c-format
+msgid "%s: could not read directory \"%s\": %s\n"
+msgstr "%s: nie można odczytać katalogu \"%s\": %s\n"
+
+#: pg_resetxlog.c:943 pg_resetxlog.c:1000 pg_resetxlog.c:1054
+#, c-format
+msgid "%s: could not close directory \"%s\": %s\n"
+msgstr "%s: nie można zamknąć katalogu \"%s\": %s\n"
+
+#: pg_resetxlog.c:984 pg_resetxlog.c:1038
+#, c-format
+msgid "%s: could not delete file \"%s\": %s\n"
+msgstr "%s: nie można usunąć pliku \"%s\": %s\n"
+
+#: pg_resetxlog.c:1123
+#, c-format
+msgid "%s: could not open file \"%s\": %s\n"
+msgstr "%s: nie można otworzyć pliku \"%s\": %s\n"
+
+#: pg_resetxlog.c:1134 pg_resetxlog.c:1148
+#, c-format
+msgid "%s: could not write file \"%s\": %s\n"
+msgstr "%s: nie można zapisać pliku \"%s\": %s\n"
+
+#: pg_resetxlog.c:1167
+#, c-format
+msgid ""
+"%s resets the PostgreSQL transaction log.\n"
+"\n"
+msgstr ""
+"%s resetuje log transakcji PostgreSQL.\n"
+"\n"
+
+#: pg_resetxlog.c:1168
+#, c-format
+msgid ""
+"Usage:\n"
+" %s [OPTION]... DATADIR\n"
+"\n"
+msgstr ""
+"Sposób użycia:\n"
+" %s [OPCJA]... FOLDERDANYCH\n"
+"\n"
+
+#: pg_resetxlog.c:1169
+#, c-format
+msgid "Options:\n"
+msgstr "Opcje:\n"
+
+#: pg_resetxlog.c:1170
+#, c-format
+msgid " -c XID,XID set oldest and newest transactions bearing commit timestamp\n"
+msgstr " -c XID,XID ustawia najstarszy i najświeższy znacznik czasu wykonywanego zatwierdzenia\n"
+
+#: pg_resetxlog.c:1171
+#, c-format
+msgid " (zero in either value means no change)\n"
+msgstr " (zero w obu wartościach oznacza brak zmian)\n"
+
+#: pg_resetxlog.c:1172
+#, c-format
+msgid " [-D] DATADIR data directory\n"
+msgstr " [-D] DATADIR folder bazy danych\n"
+
+#: pg_resetxlog.c:1173
+#, c-format
+msgid " -e XIDEPOCH set next transaction ID epoch\n"
+msgstr " -e XIDEPOCH ustawia epokę ID następnej transakcji\n"
+
+#: pg_resetxlog.c:1174
+#, c-format
+msgid " -f force update to be done\n"
+msgstr " -f wymusza wykonanie modyfikacji\n"
+
+#: pg_resetxlog.c:1175
+#, c-format
+msgid " -l XLOGFILE force minimum WAL starting location for new transaction log\n"
+msgstr " -l XLOGFILE wymusza minimalne położenie początkowe WAL dla nowego komunikatu transakcji\n"
+
+#: pg_resetxlog.c:1176
+#, c-format
+msgid " -m MXID,MXID set next and oldest multitransaction ID\n"
+msgstr " -m XID,MXID ustawia ID następnej i najstarszej multitransakcji\n"
+
+#: pg_resetxlog.c:1177
+#, c-format
+msgid " -n no update, just show what would be done (for testing)\n"
+msgstr " -n bez modyfikacji, po prostu wyświetl co będzie zrobione (do testowania)\n"
+
+#: pg_resetxlog.c:1178
+#, c-format
+msgid " -o OID set next OID\n"
+msgstr " -o OID ustawia następny OID\n"
+
+#: pg_resetxlog.c:1179
+#, c-format
+msgid " -O OFFSET set next multitransaction offset\n"
+msgstr " -O OFFSET ustawia następny offset multitransakcji\n"
+
+#: pg_resetxlog.c:1180
+#, c-format
+msgid " -V, --version output version information, then exit\n"
+msgstr " -V, --version wypisuje informacje o wersji i kończy\n"
+
+#: pg_resetxlog.c:1181
+#, c-format
+msgid " -x XID set next transaction ID\n"
+msgstr " -x XID ustawia ID następnej transakcji\n"
+
+#: pg_resetxlog.c:1182
+#, c-format
+msgid " -?, --help show this help, then exit\n"
+msgstr " -?, --help pokazuje ten ekran pomocy i kończy\n"
+
+#: pg_resetxlog.c:1183
+#, c-format
+msgid ""
+"\n"
+"Report bugs to .\n"
+msgstr ""
+"\n"
+"Błędy proszę przesyłać na adres .\n"
+
+#~ msgid "%s: invalid argument for option -x\n"
+#~ msgstr "%s: niepoprawny argument dla opcji -x\n"
+
+#~ msgid "%s: invalid argument for option -o\n"
+#~ msgstr "%s: niepoprawny argument dla opcji -o\n"
+
+#~ msgid "%s: invalid argument for option -m\n"
+#~ msgstr "%s: niepoprawny argument dla opcji -m\n"
+
+#~ msgid "%s: invalid argument for option -O\n"
+#~ msgstr "%s: niepoprawny argument dla opcji -O\n"
+
+#~ msgid "%s: invalid argument for option -l\n"
+#~ msgstr "%s: niepoprawny argument dla opcji -l\n"
+
+#~ msgid "%s: could not read from directory \"%s\": %s\n"
+#~ msgstr "%s: nie można odczytać katalogu \"%s\": %s\n"
+
+#~ msgid "First log file ID after reset: %u\n"
+#~ msgstr "Pierwszy plik dziennika po resecie: %u\n"
diff --git a/src/bin/pg_resetwal/po/pt_BR.po b/src/bin/pg_resetwal/po/pt_BR.po
new file mode 100644
index 0000000000..ca6e6d4169
--- /dev/null
+++ b/src/bin/pg_resetwal/po/pt_BR.po
@@ -0,0 +1,603 @@
+# Brazilian Portuguese message translation file for pg_resetxlog
+# Copyright (C) 2009 PostgreSQL Global Development Group
+# This file is distributed under the same license as the PostgreSQL package.
+# Cesar Suga , 2002.
+# Roberto Mello , 2002.
+# Euler Taveira de Oliveira , 2003-2016.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PostgreSQL 9.5\n"
+"Report-Msgid-Bugs-To: pgsql-bugs@postgresql.org\n"
+"POT-Creation-Date: 2016-02-13 17:22-0300\n"
+"PO-Revision-Date: 2005-10-04 22:55-0300\n"
+"Last-Translator: Euler Taveira de Oliveira \n"
+"Language-Team: Brazilian Portuguese \n"
+"Language: pt_BR\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#: ../../common/restricted_token.c:68
+#, c-format
+msgid "%s: WARNING: cannot create restricted tokens on this platform\n"
+msgstr "%s: AVISO: não pode criar informações restritas nessa plataforma\n"
+
+#: ../../common/restricted_token.c:77
+#, c-format
+msgid "%s: could not open process token: error code %lu\n"
+msgstr "%s: não pôde abrir informação sobre processo: código de erro %lu\n"
+
+#: ../../common/restricted_token.c:90
+#, c-format
+msgid "%s: could not allocate SIDs: error code %lu\n"
+msgstr "%s: não pôde alocar SIDs: código de erro %lu\n"
+
+#: ../../common/restricted_token.c:110
+#, c-format
+msgid "%s: could not create restricted token: error code %lu\n"
+msgstr "%s: não pôde criar informação restrita: código de erro %lu\n"
+
+#: ../../common/restricted_token.c:132
+#, c-format
+msgid "%s: could not start process for command \"%s\": error code %lu\n"
+msgstr "%s: não pôde iniciar processo para comando \"%s\": código de erro %lu\n"
+
+#: ../../common/restricted_token.c:170
+#, c-format
+msgid "%s: could not re-execute with restricted token: error code %lu\n"
+msgstr "%s: não pôde executar novamente com informação restrita: código de erro %lu\n"
+
+#: ../../common/restricted_token.c:186
+#, c-format
+msgid "%s: could not get exit code from subprocess: error code %lu\n"
+msgstr "%s: não pôde obter código de saída de subprocesso: código de erro %lu\n"
+
+#. translator: the second %s is a command line argument (-e, etc)
+#: pg_resetxlog.c:140 pg_resetxlog.c:155 pg_resetxlog.c:170 pg_resetxlog.c:177
+#: pg_resetxlog.c:201 pg_resetxlog.c:216 pg_resetxlog.c:224 pg_resetxlog.c:250
+#: pg_resetxlog.c:264
+#, c-format
+msgid "%s: invalid argument for option %s\n"
+msgstr "%s: argumento inválido para opção %s\n"
+
+#: pg_resetxlog.c:141 pg_resetxlog.c:156 pg_resetxlog.c:171 pg_resetxlog.c:178
+#: pg_resetxlog.c:202 pg_resetxlog.c:217 pg_resetxlog.c:225 pg_resetxlog.c:251
+#: pg_resetxlog.c:265 pg_resetxlog.c:272 pg_resetxlog.c:285 pg_resetxlog.c:293
+#, c-format
+msgid "Try \"%s --help\" for more information.\n"
+msgstr "Tente \"%s --help\" para obter informações adicionais.\n"
+
+#: pg_resetxlog.c:146
+#, c-format
+msgid "%s: transaction ID epoch (-e) must not be -1\n"
+msgstr "%s: época do ID da transação (-e) não deve ser -1\n"
+
+#: pg_resetxlog.c:161
+#, c-format
+msgid "%s: transaction ID (-x) must not be 0\n"
+msgstr "%s: ID da transação (-x) não deve ser 0\n"
+
+#: pg_resetxlog.c:185 pg_resetxlog.c:192
+#, c-format
+msgid "%s: transaction ID (-c) must be either 0 or greater than or equal to 2\n"
+msgstr "%s: ID de transação (-c) deve ser 0 ou maior ou igual a 2\n"
+
+#: pg_resetxlog.c:207
+#, c-format
+msgid "%s: OID (-o) must not be 0\n"
+msgstr "%s: OID (-o) não deve ser 0\n"
+
+#: pg_resetxlog.c:230
+#, c-format
+msgid "%s: multitransaction ID (-m) must not be 0\n"
+msgstr "%s: ID de transação múltipla (-m) não deve ser 0\n"
+
+#: pg_resetxlog.c:240
+#, c-format
+msgid "%s: oldest multitransaction ID (-m) must not be 0\n"
+msgstr "%s: ID de transação múltipla mais velho (-m) não deve ser 0\n"
+
+#: pg_resetxlog.c:256
+#, c-format
+msgid "%s: multitransaction offset (-O) must not be -1\n"
+msgstr "%s: deslocamento da transação múltipla (-O) não deve ser -1\n"
+
+#: pg_resetxlog.c:283
+#, c-format
+msgid "%s: too many command-line arguments (first is \"%s\")\n"
+msgstr "%s: muitos argumentos de linha de comando (primeiro é \"%s\")\n"
+
+#: pg_resetxlog.c:292
+#, c-format
+msgid "%s: no data directory specified\n"
+msgstr "%s: nenhum diretório de dados foi especificado\n"
+
+#: pg_resetxlog.c:306
+#, c-format
+msgid "%s: cannot be executed by \"root\"\n"
+msgstr "%s: não pode ser executado pelo \"root\"\n"
+
+#: pg_resetxlog.c:308
+#, c-format
+msgid "You must run %s as the PostgreSQL superuser.\n"
+msgstr "Você deve executar %s como um super-usuário do PostgreSQL.\n"
+
+#: pg_resetxlog.c:318
+#, c-format
+msgid "%s: could not change directory to \"%s\": %s\n"
+msgstr "%s: não pôde mudar diretório para \"%s\": %s\n"
+
+#: pg_resetxlog.c:331 pg_resetxlog.c:477
+#, c-format
+msgid "%s: could not open file \"%s\" for reading: %s\n"
+msgstr "%s: não pôde abrir arquivo \"%s\" para leitura: %s\n"
+
+#: pg_resetxlog.c:338
+#, c-format
+msgid ""
+"%s: lock file \"%s\" exists\n"
+"Is a server running? If not, delete the lock file and try again.\n"
+msgstr ""
+"%s: arquivo de bloqueio \"%s\" existe\n"
+"O servidor está executando? Se não, apague o arquivo de bloqueio e tente novamente.\n"
+
+#: pg_resetxlog.c:425
+#, c-format
+msgid ""
+"\n"
+"If these values seem acceptable, use -f to force reset.\n"
+msgstr ""
+"\n"
+"Se estes valores lhe parecem aceitáveis, use -f para forçar o reinício.\n"
+
+#: pg_resetxlog.c:437
+#, c-format
+msgid ""
+"The database server was not shut down cleanly.\n"
+"Resetting the transaction log might cause data to be lost.\n"
+"If you want to proceed anyway, use -f to force reset.\n"
+msgstr ""
+"O servidor de banco de dados não foi desligado corretamente.\n"
+"Reiniciar o log de transação pode causar perda de dados.\n"
+"Se você quer continuar mesmo assim, use -f para forçar o reinício.\n"
+
+#: pg_resetxlog.c:451
+#, c-format
+msgid "Transaction log reset\n"
+msgstr "Log de transação reiniciado\n"
+
+#: pg_resetxlog.c:480
+#, c-format
+msgid ""
+"If you are sure the data directory path is correct, execute\n"
+" touch %s\n"
+"and try again.\n"
+msgstr ""
+"Se você tem certeza que o caminho do diretório de dados está correto, execute\n"
+" touch %s\n"
+"e tente novamente.\n"
+
+#: pg_resetxlog.c:493
+#, c-format
+msgid "%s: could not read file \"%s\": %s\n"
+msgstr "%s: não pôde ler arquivo \"%s\": %s\n"
+
+#: pg_resetxlog.c:516
+#, c-format
+msgid "%s: pg_control exists but has invalid CRC; proceed with caution\n"
+msgstr "%s: pg_control existe mas tem CRC inválido: prossiga com cuidado\n"
+
+#: pg_resetxlog.c:525
+#, c-format
+msgid "%s: pg_control exists but is broken or unknown version; ignoring it\n"
+msgstr "%s: pg_control existe mas não funciona ou sua versão é desconhecida; ignorando-o\n"
+
+#: pg_resetxlog.c:628
+#, c-format
+msgid ""
+"Guessed pg_control values:\n"
+"\n"
+msgstr ""
+"Valores supostos do pg_control:\n"
+"\n"
+
+#: pg_resetxlog.c:630
+#, c-format
+msgid ""
+"Current pg_control values:\n"
+"\n"
+msgstr ""
+"Valores atuais do pg_control:\n"
+"\n"
+
+#: pg_resetxlog.c:639
+#, c-format
+msgid "pg_control version number: %u\n"
+msgstr "número da versão do pg_control: %u\n"
+
+#: pg_resetxlog.c:641
+#, c-format
+msgid "Catalog version number: %u\n"
+msgstr "Número da versão do catálogo: %u\n"
+
+#: pg_resetxlog.c:643
+#, c-format
+msgid "Database system identifier: %s\n"
+msgstr "Identificador do sistema de banco de dados: %s\n"
+
+#: pg_resetxlog.c:645
+#, c-format
+msgid "Latest checkpoint's TimeLineID: %u\n"
+msgstr "TimeLineID do último ponto de controle: %u\n"
+
+#: pg_resetxlog.c:647
+#, c-format
+msgid "Latest checkpoint's full_page_writes: %s\n"
+msgstr "full_page_writes do último ponto de controle: %s\n"
+
+#: pg_resetxlog.c:648
+msgid "off"
+msgstr "desabilitado"
+
+#: pg_resetxlog.c:648
+msgid "on"
+msgstr "habilitado"
+
+#: pg_resetxlog.c:649
+#, c-format
+msgid "Latest checkpoint's NextXID: %u/%u\n"
+msgstr "NextXID do último ponto de controle: %u/%u\n"
+
+#: pg_resetxlog.c:652
+#, c-format
+msgid "Latest checkpoint's NextOID: %u\n"
+msgstr "NextOID do último ponto de controle: %u\n"
+
+#: pg_resetxlog.c:654
+#, c-format
+msgid "Latest checkpoint's NextMultiXactId: %u\n"
+msgstr "NextMultiXactId do último ponto de controle: %u\n"
+
+#: pg_resetxlog.c:656
+#, c-format
+msgid "Latest checkpoint's NextMultiOffset: %u\n"
+msgstr "NextMultiOffset do último ponto de controle: %u\n"
+
+#: pg_resetxlog.c:658
+#, c-format
+msgid "Latest checkpoint's oldestXID: %u\n"
+msgstr "oldestXID do último ponto de controle: %u\n"
+
+#: pg_resetxlog.c:660
+#, c-format
+msgid "Latest checkpoint's oldestXID's DB: %u\n"
+msgstr "BD do oldestXID do último ponto de controle: %u\n"
+
+#: pg_resetxlog.c:662
+#, c-format
+msgid "Latest checkpoint's oldestActiveXID: %u\n"
+msgstr "oldestActiveXID do último ponto de controle: %u\n"
+
+#: pg_resetxlog.c:664
+#, c-format
+msgid "Latest checkpoint's oldestMultiXid: %u\n"
+msgstr "oldestMultiXid do último ponto de controle: %u\n"
+
+#: pg_resetxlog.c:666
+#, c-format
+msgid "Latest checkpoint's oldestMulti's DB: %u\n"
+msgstr "BD do oldestMulti do último ponto de controle: %u\n"
+
+#: pg_resetxlog.c:668
+#, c-format
+msgid "Latest checkpoint's oldestCommitTsXid:%u\n"
+msgstr "oldestCommitTsXid do último ponto de controle: %u\n"
+
+#: pg_resetxlog.c:670
+#, c-format
+msgid "Latest checkpoint's newestCommitTsXid:%u\n"
+msgstr "newestCommitTsXid do último ponto de controle: %u\n"
+
+#: pg_resetxlog.c:672
+#, c-format
+msgid "Maximum data alignment: %u\n"
+msgstr "Máximo alinhamento de dado: %u\n"
+
+#: pg_resetxlog.c:675
+#, c-format
+msgid "Database block size: %u\n"
+msgstr "Tamanho do bloco do banco de dados: %u\n"
+
+#: pg_resetxlog.c:677
+#, c-format
+msgid "Blocks per segment of large relation: %u\n"
+msgstr "Blocos por segmento da relação grande: %u\n"
+
+#: pg_resetxlog.c:679
+#, c-format
+msgid "WAL block size: %u\n"
+msgstr "Tamanho do bloco do WAL: %u\n"
+
+#: pg_resetxlog.c:681
+#, c-format
+msgid "Bytes per WAL segment: %u\n"
+msgstr "Bytes por segmento do WAL: %u\n"
+
+#: pg_resetxlog.c:683
+#, c-format
+msgid "Maximum length of identifiers: %u\n"
+msgstr "Tamanho máximo de identificadores: %u\n"
+
+#: pg_resetxlog.c:685
+#, c-format
+msgid "Maximum columns in an index: %u\n"
+msgstr "Máximo de colunas em um índice: %u\n"
+
+#: pg_resetxlog.c:687
+#, c-format
+msgid "Maximum size of a TOAST chunk: %u\n"
+msgstr "Tamanho máximo do bloco TOAST: %u\n"
+
+#: pg_resetxlog.c:689
+#, c-format
+msgid "Size of a large-object chunk: %u\n"
+msgstr "Tamanho do bloco de um objeto grande: %u\n"
+
+#: pg_resetxlog.c:691
+#, c-format
+msgid "Date/time type storage: %s\n"
+msgstr "Tipo de data/hora do repositório: %s\n"
+
+#: pg_resetxlog.c:692
+msgid "64-bit integers"
+msgstr "inteiros de 64 bits"
+
+#: pg_resetxlog.c:692
+msgid "floating-point numbers"
+msgstr "números de ponto flutuante"
+
+#: pg_resetxlog.c:693
+#, c-format
+msgid "Float4 argument passing: %s\n"
+msgstr "Passagem de argumento float4: %s\n"
+
+#: pg_resetxlog.c:694 pg_resetxlog.c:696
+msgid "by reference"
+msgstr "por referência"
+
+#: pg_resetxlog.c:694 pg_resetxlog.c:696
+msgid "by value"
+msgstr "por valor"
+
+#: pg_resetxlog.c:695
+#, c-format
+msgid "Float8 argument passing: %s\n"
+msgstr "Passagem de argumento float8: %s\n"
+
+#: pg_resetxlog.c:697
+#, c-format
+msgid "Data page checksum version: %u\n"
+msgstr "Versão da verificação de páginas de dados: %u\n"
+
+#: pg_resetxlog.c:711
+#, c-format
+msgid ""
+"\n"
+"\n"
+"Values to be changed:\n"
+"\n"
+msgstr ""
+"\n"
+"\n"
+"Valores a serem alterados:\n"
+"\n"
+
+#: pg_resetxlog.c:714
+#, c-format
+msgid "First log segment after reset: %s\n"
+msgstr "Primeiro segmento do arquivo de log após reinício: %s\n"
+
+#: pg_resetxlog.c:718
+#, c-format
+msgid "NextMultiXactId: %u\n"
+msgstr "NextMultiXactId: %u\n"
+
+#: pg_resetxlog.c:720
+#, c-format
+msgid "OldestMultiXid: %u\n"
+msgstr "OldestMultiXid: %u\n"
+
+#: pg_resetxlog.c:722
+#, c-format
+msgid "OldestMulti's DB: %u\n"
+msgstr "BD do OldestMulti: %u\n"
+
+#: pg_resetxlog.c:728
+#, c-format
+msgid "NextMultiOffset: %u\n"
+msgstr "NextMultiOffset: %u\n"
+
+#: pg_resetxlog.c:734
+#, c-format
+msgid "NextOID: %u\n"
+msgstr "NextOID: %u\n"
+
+#: pg_resetxlog.c:740
+#, c-format
+msgid "NextXID: %u\n"
+msgstr "NextXID: %u\n"
+
+#: pg_resetxlog.c:742
+#, c-format
+msgid "OldestXID: %u\n"
+msgstr "OldestXID: %u\n"
+
+#: pg_resetxlog.c:744
+#, c-format
+msgid "OldestXID's DB: %u\n"
+msgstr "BD do OldestXID: %u\n"
+
+#: pg_resetxlog.c:750
+#, c-format
+msgid "NextXID epoch: %u\n"
+msgstr "época do NextXID: %u\n"
+
+#: pg_resetxlog.c:756
+#, c-format
+msgid "oldestCommitTsXid: %u\n"
+msgstr "oldestCommitTsXid: %u\n"
+
+#: pg_resetxlog.c:761
+#, c-format
+msgid "newestCommitTsXid: %u\n"
+msgstr "newestCommitTsXid: %u\n"
+
+#: pg_resetxlog.c:827
+#, c-format
+msgid "%s: internal error -- sizeof(ControlFileData) is too large ... fix PG_CONTROL_SIZE\n"
+msgstr "%s: erro interno -- sizeof(ControlFileData) é muito grande ... conserte o PG_CONTROL_SIZE\n"
+
+#: pg_resetxlog.c:842
+#, c-format
+msgid "%s: could not create pg_control file: %s\n"
+msgstr "%s: não pôde criar arquivo do pg_control: %s\n"
+
+#: pg_resetxlog.c:853
+#, c-format
+msgid "%s: could not write pg_control file: %s\n"
+msgstr "%s: não pôde escrever no arquivo do pg_control: %s\n"
+
+#: pg_resetxlog.c:860 pg_resetxlog.c:1156
+#, c-format
+msgid "%s: fsync error: %s\n"
+msgstr "%s: erro ao executar fsync: %s\n"
+
+#: pg_resetxlog.c:900 pg_resetxlog.c:971 pg_resetxlog.c:1022
+#, c-format
+msgid "%s: could not open directory \"%s\": %s\n"
+msgstr "%s: não pôde abrir diretório \"%s\": %s\n"
+
+#: pg_resetxlog.c:936 pg_resetxlog.c:993 pg_resetxlog.c:1047
+#, c-format
+msgid "%s: could not read directory \"%s\": %s\n"
+msgstr "%s: não pôde ler diretório \"%s\": %s\n"
+
+#: pg_resetxlog.c:943 pg_resetxlog.c:1000 pg_resetxlog.c:1054
+#, c-format
+msgid "%s: could not close directory \"%s\": %s\n"
+msgstr "%s: não pôde fechar diretório \"%s\": %s\n"
+
+#: pg_resetxlog.c:984 pg_resetxlog.c:1038
+#, c-format
+msgid "%s: could not delete file \"%s\": %s\n"
+msgstr "%s: não pôde apagar arquivo \"%s\": %s\n"
+
+#: pg_resetxlog.c:1123
+#, c-format
+msgid "%s: could not open file \"%s\": %s\n"
+msgstr "%s: não pôde abrir arquivo \"%s\": %s\n"
+
+#: pg_resetxlog.c:1134 pg_resetxlog.c:1148
+#, c-format
+msgid "%s: could not write file \"%s\": %s\n"
+msgstr "%s: não pôde escrever no arquivo \"%s\": %s\n"
+
+#: pg_resetxlog.c:1167
+#, c-format
+msgid ""
+"%s resets the PostgreSQL transaction log.\n"
+"\n"
+msgstr ""
+"%s reinicia o log de transação do PostgreSQL.\n"
+"\n"
+
+#: pg_resetxlog.c:1168
+#, c-format
+msgid ""
+"Usage:\n"
+" %s [OPTION]... DATADIR\n"
+"\n"
+msgstr ""
+"Uso:\n"
+" %s [OPÇÃO]... DIRDADOS\n"
+"\n"
+
+#: pg_resetxlog.c:1169
+#, c-format
+msgid "Options:\n"
+msgstr "Opções:\n"
+
+#: pg_resetxlog.c:1170
+#, c-format
+msgid " -c XID,XID set oldest and newest transactions bearing commit timestamp\n"
+msgstr " -c XID,XID define transações mais velha e mais nova contendo timestamp de efetivação\n"
+
+#: pg_resetxlog.c:1171
+#, c-format
+msgid " (zero in either value means no change)\n"
+msgstr " (zero em qualquer valor significa nenhuma mudança)\n"
+
+#: pg_resetxlog.c:1172
+#, c-format
+msgid " [-D] DATADIR data directory\n"
+msgstr " [-D] DIRDADOS diretório de dados\n"
+
+#: pg_resetxlog.c:1173
+#, c-format
+msgid " -e XIDEPOCH set next transaction ID epoch\n"
+msgstr " -e ÉPOCA_XID define próxima época do ID de transação\n"
+
+#: pg_resetxlog.c:1174
+#, c-format
+msgid " -f force update to be done\n"
+msgstr " -f força atualização ser feita\n"
+
+#: pg_resetxlog.c:1175
+#, c-format
+msgid " -l XLOGFILE force minimum WAL starting location for new transaction log\n"
+msgstr " -l XLOGFILE força local inicial mínimo do WAL para novo log de transação\n"
+
+#: pg_resetxlog.c:1176
+#, c-format
+msgid " -m MXID,MXID set next and oldest multitransaction ID\n"
+msgstr " -m MXID,MXID define próximo e mais velho ID de transação múltipla\n"
+
+#: pg_resetxlog.c:1177
+#, c-format
+msgid " -n no update, just show what would be done (for testing)\n"
+msgstr " -n sem atualização, mostra o que seria feito (para teste)\n"
+
+#: pg_resetxlog.c:1178
+#, c-format
+msgid " -o OID set next OID\n"
+msgstr " -o OID define próximo OID\n"
+
+#: pg_resetxlog.c:1179
+#, c-format
+msgid " -O OFFSET set next multitransaction offset\n"
+msgstr " -O OFFSET define próxima posição de transação múltipla\n"
+
+#: pg_resetxlog.c:1180
+#, c-format
+msgid " -V, --version output version information, then exit\n"
+msgstr " -V, --version mostra informação sobre a versão e termina\n"
+
+#: pg_resetxlog.c:1181
+#, c-format
+msgid " -x XID set next transaction ID\n"
+msgstr " -x XID define próximo ID de transação\n"
+
+#: pg_resetxlog.c:1182
+#, c-format
+msgid " -?, --help show this help, then exit\n"
+msgstr " -?, --help mostra essa ajuda e termina\n"
+
+#: pg_resetxlog.c:1183
+#, c-format
+msgid ""
+"\n"
+"Report bugs to .\n"
+msgstr ""
+"\n"
+"Relate erros a .\n"
diff --git a/src/bin/pg_resetwal/po/ru.po b/src/bin/pg_resetwal/po/ru.po
new file mode 100644
index 0000000000..5a8e12e723
--- /dev/null
+++ b/src/bin/pg_resetwal/po/ru.po
@@ -0,0 +1,677 @@
+# PG_RESETXLOG Translated Messages into the Russian Language (UTF-8)
+#
+# Copyright (c) 2002-2005 Serguei A. Mokhov, mokhov@cs.concordia.ca
+# Copyright (c) 2012-2013 Alexander Lakhin, exclusion@gmail.com
+# Distributed under the same licensing terms as PostgreSQL itself.
+#
+# pgtranslation Id: pg_resetxlog.po,v 1.3 2009/10/14 21:08:40 petere Exp $
+#
+# translation of subject-specific terminology, see:
+# перевод некоторых специфичных терминов:
+# http://wiki.postgresql.org/wiki/NLS/ru/dict
+#
+# ChangeLog:
+# - August 24, 2014: Updates for 9.4. Alexander Lakhin .
+# - With corrections from Dmitriy Olshevskiy
+# - March 14, 2013: Updates for 9.3. Alexander Lakhin .
+# - June 27, 2012: Updates for 9.2. Alexander Lakhin .
+# - April 2, 2012: Bug fixes. Alexander Lakhin .
+# - February 18, 2012: Complete translation for 9.1. Alexander Lakhin .
+# - March 27, 2009: complete translation for 8.3, Sergey Burladyan .
+# - January 17, 2005: Complete translation for 8.0, Serguei A. Mokhov .
+# - December, 2004: Corrections and improvements by Oleg Bartunov .
+# - April 6 - August 11, 2004: Updates for 8.0; .
+# - July 24 - August 23, 2003: Updates for 7.4.*; .
+# - October 12, 2002: Complete post-7.3beta2 Translation, Serguei A. Mokhov .
+# - September 7, 2002: Complete post-7.3beta1 Translation, Serguei A. Mokhov .
+# - August 31, 2002: Initial Translation, Serguei A. Mokhov .
+msgid ""
+msgstr ""
+"Project-Id-Version: PostgreSQL 9 current\n"
+"Report-Msgid-Bugs-To: pgsql-bugs@postgresql.org\n"
+"POT-Creation-Date: 2016-05-27 16:14+0000\n"
+"PO-Revision-Date: 2015-12-31 14:56+0300\n"
+"Last-Translator: Alexander Lakhin \n"
+"Language-Team: Russian \n"
+"Language: ru\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n"
+"%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);\n"
+"X-Generator: Lokalize 2.0\n"
+
+#: ../../common/restricted_token.c:68
+#, c-format
+msgid "%s: WARNING: cannot create restricted tokens on this platform\n"
+msgstr "%s: ПРЕДУПРЕЖДЕНИЕ: в этой ОС нельзя создавать ограниченные маркеры\n"
+
+#: ../../common/restricted_token.c:77
+#, c-format
+msgid "%s: could not open process token: error code %lu\n"
+msgstr "%s: не удалось открыть маркер процесса (код ошибки: %lu)\n"
+
+#: ../../common/restricted_token.c:90
+#, c-format
+msgid "%s: could not allocate SIDs: error code %lu\n"
+msgstr "%s: не удалось подготовить структуры SID (код ошибки: %lu)\n"
+
+#: ../../common/restricted_token.c:110
+#, c-format
+msgid "%s: could not create restricted token: error code %lu\n"
+msgstr "%s: не удалось создать ограниченный маркер (код ошибки: %lu)\n"
+
+#: ../../common/restricted_token.c:132
+#, c-format
+msgid "%s: could not start process for command \"%s\": error code %lu\n"
+msgstr ""
+"%s: не удалось запустить процесс для команды \"%s\" (код ошибки: %lu)\n"
+
+#: ../../common/restricted_token.c:170
+#, c-format
+msgid "%s: could not re-execute with restricted token: error code %lu\n"
+msgstr ""
+"%s: не удалось перезапуститься с ограниченным маркером (код ошибки: %lu)\n"
+
+#: ../../common/restricted_token.c:186
+#, c-format
+msgid "%s: could not get exit code from subprocess: error code %lu\n"
+msgstr "%s: не удалось получить код выхода от подпроцесса (код ошибки: %lu)\n"
+
+#. translator: the second %s is a command line argument (-e, etc)
+#: pg_resetxlog.c:140 pg_resetxlog.c:155 pg_resetxlog.c:170 pg_resetxlog.c:177
+#: pg_resetxlog.c:201 pg_resetxlog.c:216 pg_resetxlog.c:224 pg_resetxlog.c:250
+#: pg_resetxlog.c:264
+#, c-format
+msgid "%s: invalid argument for option %s\n"
+msgstr "%s: недопустимый аргумент параметра %s\n"
+
+#: pg_resetxlog.c:141 pg_resetxlog.c:156 pg_resetxlog.c:171 pg_resetxlog.c:178
+#: pg_resetxlog.c:202 pg_resetxlog.c:217 pg_resetxlog.c:225 pg_resetxlog.c:251
+#: pg_resetxlog.c:265 pg_resetxlog.c:272 pg_resetxlog.c:285 pg_resetxlog.c:293
+#, c-format
+msgid "Try \"%s --help\" for more information.\n"
+msgstr "Для дополнительной информации попробуйте \"%s --help\".\n"
+
+#: pg_resetxlog.c:146
+#, c-format
+msgid "%s: transaction ID epoch (-e) must not be -1\n"
+msgstr "%s: эпоха ID транзакции (-e) не должна быть равна -1\n"
+
+#: pg_resetxlog.c:161
+#, c-format
+msgid "%s: transaction ID (-x) must not be 0\n"
+msgstr "%s: ID транзакции (-x) не должен быть равен 0\n"
+
+#: pg_resetxlog.c:185 pg_resetxlog.c:192
+#, c-format
+msgid ""
+"%s: transaction ID (-c) must be either 0 or greater than or equal to 2\n"
+msgstr "%s: ID транзакции (-c) должен быть равен 0, либо больше или равен 2\n"
+
+#: pg_resetxlog.c:207
+#, c-format
+msgid "%s: OID (-o) must not be 0\n"
+msgstr "%s: OID (-o) не должен быть равен 0\n"
+
+#: pg_resetxlog.c:230
+#, c-format
+msgid "%s: multitransaction ID (-m) must not be 0\n"
+msgstr "%s: ID мультитранзакции (-m) не должен быть равен 0\n"
+
+#: pg_resetxlog.c:240
+#, c-format
+msgid "%s: oldest multitransaction ID (-m) must not be 0\n"
+msgstr "%s: ID старейшей мультитранзакции (-m) не должен быть равен 0\n"
+
+#: pg_resetxlog.c:256
+#, c-format
+msgid "%s: multitransaction offset (-O) must not be -1\n"
+msgstr "%s: смещение мультитранзакции (-O) не должно быть равно -1\n"
+
+#: pg_resetxlog.c:283
+#, c-format
+msgid "%s: too many command-line arguments (first is \"%s\")\n"
+msgstr "%s: слишком много аргументов командной строки (первый: \"%s\")\n"
+
+#: pg_resetxlog.c:292
+#, c-format
+msgid "%s: no data directory specified\n"
+msgstr "%s: каталог данных не указан\n"
+
+#: pg_resetxlog.c:306
+#, c-format
+msgid "%s: cannot be executed by \"root\"\n"
+msgstr "%s: программу не должен запускать root\n"
+
+#: pg_resetxlog.c:308
+#, c-format
+msgid "You must run %s as the PostgreSQL superuser.\n"
+msgstr "Запускать %s нужно от имени суперпользователя PostgreSQL.\n"
+
+#: pg_resetxlog.c:318
+#, c-format
+msgid "%s: could not change directory to \"%s\": %s\n"
+msgstr "%s: не удалось перейти в каталог \"%s\": %s\n"
+
+#: pg_resetxlog.c:331 pg_resetxlog.c:477
+#, c-format
+msgid "%s: could not open file \"%s\" for reading: %s\n"
+msgstr "%s: не удалось открыть файл \"%s\" для чтения: %s\n"
+
+#: pg_resetxlog.c:338
+#, c-format
+msgid ""
+"%s: lock file \"%s\" exists\n"
+"Is a server running? If not, delete the lock file and try again.\n"
+msgstr ""
+"%s: обнаружен файл блокировки \"%s\"\n"
+"Возможно, сервер запущен? Если нет, удалите этот файл и попробуйте снова.\n"
+
+#: pg_resetxlog.c:425
+#, c-format
+msgid ""
+"\n"
+"If these values seem acceptable, use -f to force reset.\n"
+msgstr ""
+"\n"
+"Если эти значения приемлемы, выполните сброс принудительно, добавив ключ -"
+"f.\n"
+
+#: pg_resetxlog.c:437
+#, c-format
+msgid ""
+"The database server was not shut down cleanly.\n"
+"Resetting the transaction log might cause data to be lost.\n"
+"If you want to proceed anyway, use -f to force reset.\n"
+msgstr ""
+"Сервер баз данных был остановлен некорректно.\n"
+"Сброс журнала транзакций может привести к потере данных.\n"
+"Если вы хотите сбросить его, несмотря на это, добавьте ключ -f.\n"
+
+#: pg_resetxlog.c:451
+#, c-format
+msgid "Transaction log reset\n"
+msgstr "Журнал транзакций сброшен\n"
+
+#: pg_resetxlog.c:480
+#, c-format
+msgid ""
+"If you are sure the data directory path is correct, execute\n"
+" touch %s\n"
+"and try again.\n"
+msgstr ""
+"Если вы уверены, что путь к каталогу данных правильный, выполните\n"
+" touch %s\n"
+"и повторите попытку.\n"
+
+#: pg_resetxlog.c:493
+#, c-format
+msgid "%s: could not read file \"%s\": %s\n"
+msgstr "%s: не удалось прочитать файл \"%s\": %s\n"
+
+#: pg_resetxlog.c:516
+#, c-format
+msgid "%s: pg_control exists but has invalid CRC; proceed with caution\n"
+msgstr ""
+"%s: pg_control существует, но его контрольная сумма неверна; продолжайте с "
+"осторожностью\n"
+
+#: pg_resetxlog.c:525
+#, c-format
+msgid "%s: pg_control exists but is broken or unknown version; ignoring it\n"
+msgstr ""
+"%s: pg_control испорчен или имеет неизвестную версию; игнорируется...\n"
+
+#: pg_resetxlog.c:628
+#, c-format
+msgid ""
+"Guessed pg_control values:\n"
+"\n"
+msgstr ""
+"Предполагаемые значения pg_control:\n"
+"\n"
+
+#: pg_resetxlog.c:630
+#, c-format
+msgid ""
+"Current pg_control values:\n"
+"\n"
+msgstr ""
+"Текущие значения pg_control:\n"
+"\n"
+
+#: pg_resetxlog.c:639
+#, c-format
+msgid "pg_control version number: %u\n"
+msgstr "Номер версии pg_control: %u\n"
+
+#: pg_resetxlog.c:641
+#, c-format
+msgid "Catalog version number: %u\n"
+msgstr "Номер версии каталога: %u\n"
+
+#: pg_resetxlog.c:643
+#, c-format
+msgid "Database system identifier: %s\n"
+msgstr "Идентификатор системы баз данных: %s\n"
+
+#: pg_resetxlog.c:645
+#, c-format
+msgid "Latest checkpoint's TimeLineID: %u\n"
+msgstr "Линия времени последней конт. точки: %u\n"
+
+#: pg_resetxlog.c:647
+#, c-format
+msgid "Latest checkpoint's full_page_writes: %s\n"
+msgstr "Режим full_page_writes последней к.т: %s\n"
+
+#: pg_resetxlog.c:648
+msgid "off"
+msgstr "выкл."
+
+#: pg_resetxlog.c:648
+msgid "on"
+msgstr "вкл."
+
+#: pg_resetxlog.c:649
+#, c-format
+msgid "Latest checkpoint's NextXID: %u:%u\n"
+msgstr "NextXID последней конт. точки: %u:%u\n"
+
+#: pg_resetxlog.c:652
+#, c-format
+msgid "Latest checkpoint's NextOID: %u\n"
+msgstr "NextOID последней конт. точки: %u\n"
+
+#: pg_resetxlog.c:654
+#, c-format
+msgid "Latest checkpoint's NextMultiXactId: %u\n"
+msgstr "NextMultiXactId послед. конт. точки: %u\n"
+
+#: pg_resetxlog.c:656
+#, c-format
+msgid "Latest checkpoint's NextMultiOffset: %u\n"
+msgstr "NextMultiOffset послед. конт. точки: %u\n"
+
+#: pg_resetxlog.c:658
+#, c-format
+msgid "Latest checkpoint's oldestXID: %u\n"
+msgstr "oldestXID последней конт. точки: %u\n"
+
+#: pg_resetxlog.c:660
+#, c-format
+msgid "Latest checkpoint's oldestXID's DB: %u\n"
+msgstr "БД с oldestXID последней конт. точки: %u\n"
+
+#: pg_resetxlog.c:662
+#, c-format
+msgid "Latest checkpoint's oldestActiveXID: %u\n"
+msgstr "oldestActiveXID последней к.т.: %u\n"
+
+#: pg_resetxlog.c:664
+#, c-format
+msgid "Latest checkpoint's oldestMultiXid: %u\n"
+msgstr "oldestMultiXid последней конт. точки: %u\n"
+
+#: pg_resetxlog.c:666
+#, c-format
+msgid "Latest checkpoint's oldestMulti's DB: %u\n"
+msgstr "БД с oldestMulti последней к.т.: %u\n"
+
+#: pg_resetxlog.c:668
+#, c-format
+msgid "Latest checkpoint's oldestCommitTsXid:%u\n"
+msgstr "oldestCommitTsXid последней к. т.: %u\n"
+
+#: pg_resetxlog.c:670
+#, c-format
+msgid "Latest checkpoint's newestCommitTsXid:%u\n"
+msgstr "newestCommitTsXid последней к. т.: %u\n"
+
+#: pg_resetxlog.c:672
+#, c-format
+msgid "Maximum data alignment: %u\n"
+msgstr "Макс. предел выравнивания данных: %u\n"
+
+#: pg_resetxlog.c:675
+#, c-format
+msgid "Database block size: %u\n"
+msgstr "Размер блока БД: %u\n"
+
+#: pg_resetxlog.c:677
+#, c-format
+msgid "Blocks per segment of large relation: %u\n"
+msgstr "Блоков в макс. сегменте отношений: %u\n"
+
+#: pg_resetxlog.c:679
+#, c-format
+msgid "WAL block size: %u\n"
+msgstr "Размер блока WAL: %u\n"
+
+#: pg_resetxlog.c:681
+#, c-format
+msgid "Bytes per WAL segment: %u\n"
+msgstr "Байт в сегменте WAL: %u\n"
+
+#: pg_resetxlog.c:683
+#, c-format
+msgid "Maximum length of identifiers: %u\n"
+msgstr "Максимальная длина идентификаторов: %u\n"
+
+#: pg_resetxlog.c:685
+#, c-format
+msgid "Maximum columns in an index: %u\n"
+msgstr "Максимальное число столбцов в индексе: %u\n"
+
+#: pg_resetxlog.c:687
+#, c-format
+msgid "Maximum size of a TOAST chunk: %u\n"
+msgstr "Максимальный размер порции TOAST: %u\n"
+
+#: pg_resetxlog.c:689
+#, c-format
+msgid "Size of a large-object chunk: %u\n"
+msgstr "Размер порции большого объекта: %u\n"
+
+#: pg_resetxlog.c:691
+#, c-format
+msgid "Date/time type storage: %s\n"
+msgstr "Формат хранения даты/времени: %s\n"
+
+#: pg_resetxlog.c:692
+msgid "64-bit integers"
+msgstr "64-битные целые"
+
+#: pg_resetxlog.c:692
+msgid "floating-point numbers"
+msgstr "числа с плавающей точкой"
+
+#: pg_resetxlog.c:693
+#, c-format
+msgid "Float4 argument passing: %s\n"
+msgstr "Передача аргумента Float4: %s\n"
+
+#: pg_resetxlog.c:694 pg_resetxlog.c:696
+msgid "by reference"
+msgstr "по ссылке"
+
+#: pg_resetxlog.c:694 pg_resetxlog.c:696
+msgid "by value"
+msgstr "по значению"
+
+#: pg_resetxlog.c:695
+#, c-format
+msgid "Float8 argument passing: %s\n"
+msgstr "Передача аргумента Float8: %s\n"
+
+#: pg_resetxlog.c:697
+#, c-format
+msgid "Data page checksum version: %u\n"
+msgstr "Версия контрольных сумм страниц: %u\n"
+
+#: pg_resetxlog.c:711
+#, c-format
+msgid ""
+"\n"
+"\n"
+"Values to be changed:\n"
+"\n"
+msgstr ""
+"\n"
+"\n"
+"Значения, которые будут изменены:\n"
+"\n"
+
+#: pg_resetxlog.c:714
+#, c-format
+msgid "First log segment after reset: %s\n"
+msgstr "Первый сегмент журнала после сброса: %s\n"
+
+#: pg_resetxlog.c:718
+#, c-format
+msgid "NextMultiXactId: %u\n"
+msgstr "NextMultiXactId: %u\n"
+
+#: pg_resetxlog.c:720
+#, c-format
+msgid "OldestMultiXid: %u\n"
+msgstr "OldestMultiXid: %u\n"
+
+#: pg_resetxlog.c:722
+#, c-format
+msgid "OldestMulti's DB: %u\n"
+msgstr "БД с oldestMultiXid: %u\n"
+
+#: pg_resetxlog.c:728
+#, c-format
+msgid "NextMultiOffset: %u\n"
+msgstr "NextMultiOffset: %u\n"
+
+#: pg_resetxlog.c:734
+#, c-format
+msgid "NextOID: %u\n"
+msgstr "NextOID: %u\n"
+
+#: pg_resetxlog.c:740
+#, c-format
+msgid "NextXID: %u\n"
+msgstr "NextXID: %u\n"
+
+#: pg_resetxlog.c:742
+#, c-format
+msgid "OldestXID: %u\n"
+msgstr "OldestXID: %u\n"
+
+#: pg_resetxlog.c:744
+#, c-format
+msgid "OldestXID's DB: %u\n"
+msgstr "БД с oldestXID: %u\n"
+
+#: pg_resetxlog.c:750
+#, c-format
+msgid "NextXID epoch: %u\n"
+msgstr "Эпоха NextXID: %u\n"
+
+#: pg_resetxlog.c:756
+#, c-format
+msgid "oldestCommitTsXid: %u\n"
+msgstr "oldestCommitTsXid: %u\n"
+
+#: pg_resetxlog.c:761
+#, c-format
+msgid "newestCommitTsXid: %u\n"
+msgstr "newestCommitTsXid: %u\n"
+
+#: pg_resetxlog.c:827
+#, c-format
+msgid ""
+"%s: internal error -- sizeof(ControlFileData) is too large ... fix "
+"PG_CONTROL_SIZE\n"
+msgstr ""
+"%s: внутренняя ошибка -- размер ControlFileData слишком велик -- исправьте "
+"PG_CONTROL_SIZE\n"
+
+#: pg_resetxlog.c:842
+#, c-format
+msgid "%s: could not create pg_control file: %s\n"
+msgstr "%s: не удалось создать файл pg_control: %s\n"
+
+#: pg_resetxlog.c:853
+#, c-format
+msgid "%s: could not write pg_control file: %s\n"
+msgstr "%s: не удалось записать файл pg_control: %s\n"
+
+#: pg_resetxlog.c:860 pg_resetxlog.c:1156
+#, c-format
+msgid "%s: fsync error: %s\n"
+msgstr "%s: ошибка синхронизации с ФС: %s\n"
+
+#: pg_resetxlog.c:900 pg_resetxlog.c:971 pg_resetxlog.c:1022
+#, c-format
+msgid "%s: could not open directory \"%s\": %s\n"
+msgstr "%s: не удалось открыть каталог \"%s\": %s\n"
+
+#: pg_resetxlog.c:936 pg_resetxlog.c:993 pg_resetxlog.c:1047
+#, c-format
+msgid "%s: could not read directory \"%s\": %s\n"
+msgstr "%s: не удалось прочитать каталог \"%s\": %s\n"
+
+#: pg_resetxlog.c:943 pg_resetxlog.c:1000 pg_resetxlog.c:1054
+#, c-format
+msgid "%s: could not close directory \"%s\": %s\n"
+msgstr "%s: не удалось закрыть каталог \"%s\": %s\n"
+
+#: pg_resetxlog.c:984 pg_resetxlog.c:1038
+#, c-format
+msgid "%s: could not delete file \"%s\": %s\n"
+msgstr "%s: ошибка при удалении файла \"%s\": %s\n"
+
+#: pg_resetxlog.c:1123
+#, c-format
+msgid "%s: could not open file \"%s\": %s\n"
+msgstr "%s: не удалось открыть файл \"%s\": %s\n"
+
+#: pg_resetxlog.c:1134 pg_resetxlog.c:1148
+#, c-format
+msgid "%s: could not write file \"%s\": %s\n"
+msgstr "%s: не удалось записать файл \"%s\": %s\n"
+
+#: pg_resetxlog.c:1167
+#, c-format
+msgid ""
+"%s resets the PostgreSQL transaction log.\n"
+"\n"
+msgstr ""
+"%s сбрасывает журнал транзакций PostgreSQL.\n"
+"\n"
+
+#: pg_resetxlog.c:1168
+#, c-format
+msgid ""
+"Usage:\n"
+" %s [OPTION]... DATADIR\n"
+"\n"
+msgstr ""
+"Usage:\n"
+" %s [ПАРАМЕТР]... КАТ_ДАННЫХ\n"
+"\n"
+
+#: pg_resetxlog.c:1169
+#, c-format
+msgid "Options:\n"
+msgstr "Параметры:\n"
+
+#: pg_resetxlog.c:1170
+#, c-format
+msgid ""
+" -c XID,XID set oldest and newest transactions bearing commit "
+"timestamp\n"
+msgstr ""
+" -c XID,XID задать старейшую и новейшую транзакции, несущие метку "
+"времени фиксации\n"
+
+#: pg_resetxlog.c:1171
+#, c-format
+msgid " (zero in either value means no change)\n"
+msgstr " (0 в любом из аргументов игнорируется)\n"
+
+#: pg_resetxlog.c:1172
+#, c-format
+msgid " [-D] DATADIR data directory\n"
+msgstr " [-D] КАТ_ДАННЫХ каталог данных\n"
+
+#: pg_resetxlog.c:1173
+#, c-format
+msgid " -e XIDEPOCH set next transaction ID epoch\n"
+msgstr " -e XIDEPOCH задать эпоху в ID следующей транзакции\n"
+
+#: pg_resetxlog.c:1174
+#, c-format
+msgid " -f force update to be done\n"
+msgstr " -f принудительное выполнение операции\n"
+
+#: pg_resetxlog.c:1175
+#, c-format
+msgid ""
+" -l XLOGFILE force minimum WAL starting location for new transaction "
+"log\n"
+msgstr ""
+" -l XLOGFILE задать минимальное начальное положение WAL для нового\n"
+" журнала транзакций\n"
+
+#: pg_resetxlog.c:1176
+#, c-format
+msgid " -m MXID,MXID set next and oldest multitransaction ID\n"
+msgstr " -m MXID,MXID задать ID следующей и старейшей мультитранзакции\n"
+
+#: pg_resetxlog.c:1177
+#, c-format
+msgid ""
+" -n no update, just show what would be done (for testing)\n"
+msgstr ""
+" -n показать, какие действия будут выполнены, но не выполнять "
+"их\n"
+" (для проверки)\n"
+
+#: pg_resetxlog.c:1178
+#, c-format
+msgid " -o OID set next OID\n"
+msgstr " -o OID задать следующий OID\n"
+
+#: pg_resetxlog.c:1179
+#, c-format
+msgid " -O OFFSET set next multitransaction offset\n"
+msgstr " -O СМЕЩЕНИЕ задать смещение следующей мультитранзакции\n"
+
+#: pg_resetxlog.c:1180
+#, c-format
+msgid " -V, --version output version information, then exit\n"
+msgstr " -V, --version показать версию и выйти\n"
+
+#: pg_resetxlog.c:1181
+#, c-format
+msgid " -x XID set next transaction ID\n"
+msgstr " -x XID задать ID следующей транзакции\n"
+
+#: pg_resetxlog.c:1182
+#, c-format
+msgid " -?, --help show this help, then exit\n"
+msgstr " -?, --help показать эту справку и выйти\n"
+
+#: pg_resetxlog.c:1183
+#, c-format
+msgid ""
+"\n"
+"Report bugs to .\n"
+msgstr ""
+"\n"
+"Об ошибках сообщайте по адресу .\n"
+
+#~ msgid "%s: invalid argument for option -x\n"
+#~ msgstr "%s: недопустимый аргумент параметра -x\n"
+
+#~ msgid "%s: invalid argument for option -o\n"
+#~ msgstr "%s: недопустимый аргумент параметра -o\n"
+
+#~ msgid "%s: invalid argument for option -m\n"
+#~ msgstr "%s: недопустимый аргумент параметра -m\n"
+
+#~ msgid "%s: invalid argument for option -O\n"
+#~ msgstr "%s: недопустимый аргумент параметра -O\n"
+
+#~ msgid "%s: invalid argument for option -l\n"
+#~ msgstr "%s: недопустимый аргумент параметра -l\n"
+
+#~ msgid " -m XID,OLDEST set next multitransaction ID and oldest value\n"
+#~ msgstr ""
+#~ " -m XID,СТАРЕЙШАЯ задать ID следующей мультитранзакции и ID старейшей\n"
+
+#~ msgid "disabled"
+#~ msgstr "отключен"
+
+#~ msgid "enabled"
+#~ msgstr "включен"
+
+#~ msgid "First log file ID after reset: %u\n"
+#~ msgstr "ID первого журнала после сброса: %u\n"
diff --git a/src/bin/pg_resetwal/po/sv.po b/src/bin/pg_resetwal/po/sv.po
new file mode 100644
index 0000000000..528531be56
--- /dev/null
+++ b/src/bin/pg_resetwal/po/sv.po
@@ -0,0 +1,550 @@
+# Swedish message translation file for resetxlog.
+# Dennis Björklund , 2002, 2003, 2004, 2005, 2006.
+# Peter Eisentraut , 2010.
+# Mats Erik Andersson , 2014.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PostgreSQL 9.4\n"
+"Report-Msgid-Bugs-To: pgsql-bugs@postgresql.org\n"
+"POT-Creation-Date: 2014-11-25 16:12+0000\n"
+"PO-Revision-Date: 2014-11-29 18:32+0100\n"
+"Last-Translator: Mats Erik Andersson \n"
+"Language-Team: Swedish \n"
+"Language: sv\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#: pg_resetxlog.c:130
+#, c-format
+msgid "%s: invalid argument for option -e\n"
+msgstr "%s: Ogiltigt argument för växel -e.\n"
+
+#: pg_resetxlog.c:131 pg_resetxlog.c:146 pg_resetxlog.c:161 pg_resetxlog.c:176
+#: pg_resetxlog.c:184 pg_resetxlog.c:210 pg_resetxlog.c:224 pg_resetxlog.c:231
+#: pg_resetxlog.c:239
+#, c-format
+msgid "Try \"%s --help\" for more information.\n"
+msgstr "Försök med \"%s --help\" för mer information.\n"
+
+#: pg_resetxlog.c:136
+#, c-format
+msgid "%s: transaction ID epoch (-e) must not be -1\n"
+msgstr "%s: Epok (-e) för transaktions-ID får inte vara -1.\n"
+
+#: pg_resetxlog.c:145
+#, c-format
+msgid "%s: invalid argument for option -x\n"
+msgstr "%s: Ogiltigt argument för växel -x.\n"
+
+#: pg_resetxlog.c:151
+#, c-format
+msgid "%s: transaction ID (-x) must not be 0\n"
+msgstr "%s: Transaktions-ID (-x) får inte vara 0.\n"
+
+#: pg_resetxlog.c:160
+#, c-format
+msgid "%s: invalid argument for option -o\n"
+msgstr "%s: Ogiltigt argument för växel -o.\n"
+
+#: pg_resetxlog.c:166
+#, c-format
+msgid "%s: OID (-o) must not be 0\n"
+msgstr "%s: OID (-o) får inte vara 0.\n"
+
+#: pg_resetxlog.c:175 pg_resetxlog.c:183
+#, c-format
+msgid "%s: invalid argument for option -m\n"
+msgstr "%s: Ogiltigt argument för växel -m.\n"
+
+#: pg_resetxlog.c:189
+#, c-format
+msgid "%s: multitransaction ID (-m) must not be 0\n"
+msgstr "%s: Multitransaktions-ID (-m) får inte vara 0.\n"
+
+#: pg_resetxlog.c:199
+#, c-format
+msgid "%s: oldest multitransaction ID (-m) must not be 0\n"
+msgstr "%s: Äldsta multitransaktions-ID (-m) får inte vara 0.\n"
+
+#: pg_resetxlog.c:209
+#, c-format
+msgid "%s: invalid argument for option -O\n"
+msgstr "%s: Ogiltigt argument för växel -O.\n"
+
+#: pg_resetxlog.c:215
+#, c-format
+msgid "%s: multitransaction offset (-O) must not be -1\n"
+msgstr "%s: Multitransaktionsoffset (-O) får inte vara -1.\n"
+
+#: pg_resetxlog.c:223
+#, c-format
+msgid "%s: invalid argument for option -l\n"
+msgstr "%s: Ogiltigt argument för växel -l.\n"
+
+#: pg_resetxlog.c:238
+#, c-format
+msgid "%s: no data directory specified\n"
+msgstr "%s: Ingen datakatalog angiven.\n"
+
+#: pg_resetxlog.c:252
+#, c-format
+msgid "%s: cannot be executed by \"root\"\n"
+msgstr "%s: Får inte utföras av \"root\".\n"
+
+#: pg_resetxlog.c:254
+#, c-format
+msgid "You must run %s as the PostgreSQL superuser.\n"
+msgstr "Du måste köra %s som PostgreSQL:s superanvändare.\n"
+
+#: pg_resetxlog.c:264
+#, c-format
+msgid "%s: could not change directory to \"%s\": %s\n"
+msgstr "%s: Kunde inte gå till katalog \"%s\": %s\n"
+
+#: pg_resetxlog.c:277 pg_resetxlog.c:418
+#, c-format
+msgid "%s: could not open file \"%s\" for reading: %s\n"
+msgstr "%s: Kunde inte öppna filen \"%s\" för läsning: %s\n"
+
+#: pg_resetxlog.c:284
+#, c-format
+msgid ""
+"%s: lock file \"%s\" exists\n"
+"Is a server running? If not, delete the lock file and try again.\n"
+msgstr ""
+"%s: En låsfil \"%s\" finns på plats.\n"
+"Kör servern redan? Om inte, radera låsfilen och försök igen.\n"
+
+#: pg_resetxlog.c:366
+#, c-format
+msgid ""
+"\n"
+"If these values seem acceptable, use -f to force reset.\n"
+msgstr ""
+"\n"
+"Om dessa värden verkar godtagbara, använd då -f för att\n"
+"framtvinga återställning.\n"
+
+#: pg_resetxlog.c:378
+#, c-format
+msgid ""
+"The database server was not shut down cleanly.\n"
+"Resetting the transaction log might cause data to be lost.\n"
+"If you want to proceed anyway, use -f to force reset.\n"
+msgstr ""
+"Databasservern stängdes inte av ordentligt. Att återställa\n"
+"transaktionsloggen kan medföra att data förloras. Om du ändå\n"
+"vill fortsätta, använd -f för att framtvinga återställning.\n"
+
+#: pg_resetxlog.c:392
+#, c-format
+msgid "Transaction log reset\n"
+msgstr "Återställning av transaktionslogg.\n"
+
+#: pg_resetxlog.c:421
+#, c-format
+msgid ""
+"If you are sure the data directory path is correct, execute\n"
+" touch %s\n"
+"and try again.\n"
+msgstr ""
+"Om du är säker på att sökvägen till datakatalogen är riktig,\n"
+"utför då \"touch %s\" och försök sedan igen.\n"
+
+#: pg_resetxlog.c:434
+#, c-format
+msgid "%s: could not read file \"%s\": %s\n"
+msgstr "%s: Kunde inte läsa fil \"%s\": %s\n"
+
+#: pg_resetxlog.c:457
+#, c-format
+msgid "%s: pg_control exists but has invalid CRC; proceed with caution\n"
+msgstr "%s: pg_control existerar men har ogiltig CRC. Fortsätt med varsamhet.\n"
+
+#: pg_resetxlog.c:466
+#, c-format
+msgid "%s: pg_control exists but is broken or unknown version; ignoring it\n"
+msgstr "%s: pg_control existerar men är trasig eller har okänd version. Den ignoreras.\n"
+
+#: pg_resetxlog.c:568
+#, c-format
+msgid ""
+"Guessed pg_control values:\n"
+"\n"
+msgstr ""
+"Gissade värden för pg_control:\n"
+"\n"
+
+#: pg_resetxlog.c:570
+#, c-format
+msgid ""
+"Current pg_control values:\n"
+"\n"
+msgstr ""
+"Nuvarande värden för pg_control:\n"
+"\n"
+
+# November 26th, 2014: Insert six additional space characters
+# for best alignment with Swedish translation.
+# Translations should be checked against those of pg_controldata.
+#: pg_resetxlog.c:579
+#, c-format
+msgid "pg_control version number: %u\n"
+msgstr "Versionsnummer för pg_control: %u\n"
+
+#: pg_resetxlog.c:581
+#, c-format
+msgid "Catalog version number: %u\n"
+msgstr "Katalogversion: %u\n"
+
+#: pg_resetxlog.c:583
+#, c-format
+msgid "Database system identifier: %s\n"
+msgstr "Databasens systemidentifierare: %s\n"
+
+#: pg_resetxlog.c:585
+#, c-format
+msgid "Latest checkpoint's TimeLineID: %u\n"
+msgstr "TimeLineID vid senaste kontrollpunkt: %u\n"
+
+#: pg_resetxlog.c:587
+#, c-format
+msgid "Latest checkpoint's full_page_writes: %s\n"
+msgstr "Senaste kontrollpunktens full_page_writes: %s\n"
+
+#: pg_resetxlog.c:588
+msgid "off"
+msgstr "av"
+
+#: pg_resetxlog.c:588
+msgid "on"
+msgstr "på"
+
+#: pg_resetxlog.c:589
+#, c-format
+msgid "Latest checkpoint's NextXID: %u/%u\n"
+msgstr "NextXID vid senaste kontrollpunkt: %u/%u\n"
+
+#: pg_resetxlog.c:592
+#, c-format
+msgid "Latest checkpoint's NextOID: %u\n"
+msgstr "NextOID vid senaste kontrollpunkt: %u\n"
+
+#: pg_resetxlog.c:594
+#, c-format
+msgid "Latest checkpoint's NextMultiXactId: %u\n"
+msgstr "NextMultiXactId vid senaste kontrollpunkt: %u\n"
+
+#: pg_resetxlog.c:596
+#, c-format
+msgid "Latest checkpoint's NextMultiOffset: %u\n"
+msgstr "NextMultiOffset vid senaste kontrollpunkt: %u\n"
+
+#: pg_resetxlog.c:598
+#, c-format
+msgid "Latest checkpoint's oldestXID: %u\n"
+msgstr "oldestXID vid senaste kontrollpunkt: %u\n"
+
+#: pg_resetxlog.c:600
+#, c-format
+msgid "Latest checkpoint's oldestXID's DB: %u\n"
+msgstr "DB för oldestXID vid senaste kontrollpunkt: %u\n"
+
+# FIXME: too wide
+#: pg_resetxlog.c:602
+#, c-format
+msgid "Latest checkpoint's oldestActiveXID: %u\n"
+msgstr "oldestActiveXID vid senaste kontrollpunkt: %u\n"
+
+#: pg_resetxlog.c:604
+#, c-format
+msgid "Latest checkpoint's oldestMultiXid: %u\n"
+msgstr "oldestMultiXid vid senaste kontrollpunkt: %u\n"
+
+#: pg_resetxlog.c:606
+#, c-format
+msgid "Latest checkpoint's oldestMulti's DB: %u\n"
+msgstr "DB för oldestMulti vid senaste kontrollpkt: %u\n"
+
+#: pg_resetxlog.c:608
+#, c-format
+msgid "Maximum data alignment: %u\n"
+msgstr "Maximal jämkning av data (alignment): %u\n"
+
+#: pg_resetxlog.c:611
+#, c-format
+msgid "Database block size: %u\n"
+msgstr "Databasens blockstorlek: %u\n"
+
+#: pg_resetxlog.c:613
+#, c-format
+msgid "Blocks per segment of large relation: %u\n"
+msgstr "Block per segment i en stor relation: %u\n"
+
+#: pg_resetxlog.c:615
+#, c-format
+msgid "WAL block size: %u\n"
+msgstr "Blockstorlek i transaktionsloggen: %u\n"
+
+#: pg_resetxlog.c:617
+#, c-format
+msgid "Bytes per WAL segment: %u\n"
+msgstr "Segmentstorlek i transaktionsloggen: %u\n"
+
+#: pg_resetxlog.c:619
+#, c-format
+msgid "Maximum length of identifiers: %u\n"
+msgstr "Maximal längd för identifierare: %u\n"
+
+#: pg_resetxlog.c:621
+#, c-format
+msgid "Maximum columns in an index: %u\n"
+msgstr "Maximalt antal kolonner i ett index: %u\n"
+
+#: pg_resetxlog.c:623
+#, c-format
+msgid "Maximum size of a TOAST chunk: %u\n"
+msgstr "Maximal storlek för en TOAST-enhet: %u\n"
+
+#: pg_resetxlog.c:625
+#, c-format
+msgid "Size of a large-object chunk: %u\n"
+msgstr "Storlek för large-object-enheter: %u\n"
+
+#: pg_resetxlog.c:627
+#, c-format
+msgid "Date/time type storage: %s\n"
+msgstr "Representation av dag och tid: %s\n"
+
+#: pg_resetxlog.c:628
+msgid "64-bit integers"
+msgstr "64-bitars heltal"
+
+#: pg_resetxlog.c:628
+msgid "floating-point numbers"
+msgstr "flyttal"
+
+#: pg_resetxlog.c:629
+#, c-format
+msgid "Float4 argument passing: %s\n"
+msgstr "Åtkomst till float4-argument: %s\n"
+
+#: pg_resetxlog.c:630 pg_resetxlog.c:632
+msgid "by reference"
+msgstr "referens"
+
+#: pg_resetxlog.c:630 pg_resetxlog.c:632
+msgid "by value"
+msgstr "värdeåtkomst"
+
+#: pg_resetxlog.c:631
+#, c-format
+msgid "Float8 argument passing: %s\n"
+msgstr "Åtkomst till float8-argument: %s\n"
+
+#: pg_resetxlog.c:633
+#, c-format
+msgid "Data page checksum version: %u\n"
+msgstr "Checksummaversion för datasidor: %u\n"
+
+#: pg_resetxlog.c:647
+#, c-format
+msgid ""
+"\n"
+"\n"
+"Values to be changed:\n"
+"\n"
+msgstr ""
+"\n"
+"\n"
+"Värden att förändra:\n"
+"\n"
+
+# November 26th, 2014: Insert additional spacing to fit
+# with the first translated text, which uses most characters.
+#: pg_resetxlog.c:650
+#, c-format
+msgid "First log segment after reset: %s\n"
+msgstr "Första loggsegment efter återställning: %s\n"
+
+#: pg_resetxlog.c:654
+#, c-format
+msgid "NextMultiXactId: %u\n"
+msgstr "NextMultiXactId: %u\n"
+
+#: pg_resetxlog.c:656
+#, c-format
+msgid "OldestMultiXid: %u\n"
+msgstr "OldestMultiXid: %u\n"
+
+#: pg_resetxlog.c:658
+#, c-format
+msgid "OldestMulti's DB: %u\n"
+msgstr "DB för OldestMulti: %u\n"
+
+#: pg_resetxlog.c:664
+#, c-format
+msgid "NextMultiOffset: %u\n"
+msgstr "NextMultiOffset: %u\n"
+
+#: pg_resetxlog.c:670
+#, c-format
+msgid "NextOID: %u\n"
+msgstr "NextOID: %u\n"
+
+#: pg_resetxlog.c:676
+#, c-format
+msgid "NextXID: %u\n"
+msgstr "NextXID: %u\n"
+
+#: pg_resetxlog.c:678
+#, c-format
+msgid "OldestXID: %u\n"
+msgstr "OldestXID: %u\n"
+
+#: pg_resetxlog.c:680
+#, c-format
+msgid "OldestXID's DB: %u\n"
+msgstr "DB för OldestXID: %u\n"
+
+#: pg_resetxlog.c:686
+#, c-format
+msgid "NextXID epoch: %u\n"
+msgstr "Epok för NextXID: %u\n"
+
+
+#: pg_resetxlog.c:751
+#, c-format
+msgid "%s: internal error -- sizeof(ControlFileData) is too large ... fix PG_CONTROL_SIZE\n"
+msgstr "%s: Internt fel: sizeof(ControlFileData) är alltför stor. Rätta till PG_CONTROL_SIZE.\n"
+
+#: pg_resetxlog.c:766
+#, c-format
+msgid "%s: could not create pg_control file: %s\n"
+msgstr "%s: Kunde inte skapa fil för pg_control: %s\n"
+
+#: pg_resetxlog.c:777
+#, c-format
+msgid "%s: could not write pg_control file: %s\n"
+msgstr "%s: Kunde inte skriva fil för pg_control: %s\n"
+
+#: pg_resetxlog.c:784 pg_resetxlog.c:1068
+#, c-format
+msgid "%s: fsync error: %s\n"
+msgstr "%s: Misslyckad fsync: %s\n"
+
+#: pg_resetxlog.c:824 pg_resetxlog.c:890 pg_resetxlog.c:941
+#, c-format
+msgid "%s: could not open directory \"%s\": %s\n"
+msgstr "%s: Kunde inte öppna filkatalog \"%s\": %s\n"
+
+#: pg_resetxlog.c:855 pg_resetxlog.c:912 pg_resetxlog.c:964
+#, c-format
+msgid "%s: could not read directory \"%s\": %s\n"
+msgstr "%s: Kunde inte läsa filkatalog \"%s\": %s\n"
+
+#: pg_resetxlog.c:862 pg_resetxlog.c:919 pg_resetxlog.c:971
+#, c-format
+msgid "%s: could not close directory \"%s\": %s\n"
+msgstr "%s: Kunde inte stänga filkatalog \"%s\": %s\n"
+
+#: pg_resetxlog.c:903 pg_resetxlog.c:955
+#, c-format
+msgid "%s: could not delete file \"%s\": %s\n"
+msgstr "%s: Kunde inte radera fil \"%s\": %s\n"
+
+#: pg_resetxlog.c:1035
+#, c-format
+msgid "%s: could not open file \"%s\": %s\n"
+msgstr "%s: Kunde inte öppna fil \"%s\": %s\n"
+
+#: pg_resetxlog.c:1046 pg_resetxlog.c:1060
+#, c-format
+msgid "%s: could not write file \"%s\": %s\n"
+msgstr "%s: Kunde inte skriva fil \"%s\": %s\n"
+
+#: pg_resetxlog.c:1079
+#, c-format
+msgid ""
+"%s resets the PostgreSQL transaction log.\n"
+"\n"
+msgstr ""
+"%s återställer transaktionslogg för PostgreSQL.\n"
+"\n"
+
+#: pg_resetxlog.c:1080
+#, c-format
+msgid ""
+"Usage:\n"
+" %s [OPTION]... DATADIR\n"
+"\n"
+msgstr ""
+"Användning:\n"
+" %s [FLAGGA]... DATAKATALOG\n"
+"\n"
+
+#: pg_resetxlog.c:1081
+#, c-format
+msgid "Options:\n"
+msgstr "Programväxlar:\n"
+
+#: pg_resetxlog.c:1082
+#, c-format
+msgid " -e XIDEPOCH set next transaction ID epoch\n"
+msgstr " -e XIDEPOCH sätter epok för nästa transaktions-ID\n"
+
+#: pg_resetxlog.c:1083
+#, c-format
+msgid " -f force update to be done\n"
+msgstr " -f framtvinga återställning\n"
+
+#: pg_resetxlog.c:1084
+#, c-format
+msgid " -l XLOGFILE force minimum WAL starting location for new transaction log\n"
+msgstr " -l XLOGFIL ny transaktionslogg måste vara detta namn eller ett senare\n"
+
+#: pg_resetxlog.c:1085
+#, c-format
+msgid " -m MXID,MXID set next and oldest multitransaction ID\n"
+msgstr " -m MXID,MXID sätt nästa och äldsta multitransaktions-ID\n"
+
+#: pg_resetxlog.c:1086
+#, c-format
+msgid " -n no update, just show what would be done (for testing)\n"
+msgstr " -n ingen updatering; visa planerade åtgärder (för testning)\n"
+
+#: pg_resetxlog.c:1087
+#, c-format
+msgid " -o OID set next OID\n"
+msgstr " -o OID sätt nästa OID\n"
+
+#: pg_resetxlog.c:1088
+#, c-format
+msgid " -O OFFSET set next multitransaction offset\n"
+msgstr " -O OFFSET sätt nästa multitransaktionsoffset\n"
+
+#: pg_resetxlog.c:1089
+#, c-format
+msgid " -V, --version output version information, then exit\n"
+msgstr " -V, --version visa versionsinformation, avsluta sedan\n"
+
+#: pg_resetxlog.c:1090
+#, c-format
+msgid " -x XID set next transaction ID\n"
+msgstr " -x XID sätt nästa transaktions-ID\n"
+
+#: pg_resetxlog.c:1091
+#, c-format
+msgid " -?, --help show this help, then exit\n"
+msgstr " -?, --help visa denna hjälp, avsluta sedan\n"
+
+#: pg_resetxlog.c:1092
+#, c-format
+msgid ""
+"\n"
+"Report bugs to .\n"
+msgstr ""
+"\n"
+"Reportera fel till .\n"
diff --git a/src/bin/pg_resetwal/po/zh_CN.po b/src/bin/pg_resetwal/po/zh_CN.po
new file mode 100644
index 0000000000..c199ac9a1a
--- /dev/null
+++ b/src/bin/pg_resetwal/po/zh_CN.po
@@ -0,0 +1,661 @@
+# simplified Chinese translation file for pg_resetxlog and friends
+# Bao Wei , 2002.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: pg_resetxlog (PostgreSQL 9.0)\n"
+"Report-Msgid-Bugs-To: pgsql-bugs@postgresql.org\n"
+"POT-Creation-Date: 2016-04-18 04:44+0000\n"
+"PO-Revision-Date: 2016-05-19 20:41+0800\n"
+"Last-Translator: Yuwei Peng \n"
+"Language-Team: Chinese (Simplified) \n"
+"Language: zh_CN\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"X-Generator: Poedit 1.5.7\n"
+
+#: ../../common/restricted_token.c:68
+#, c-format
+msgid "%s: WARNING: cannot create restricted tokens on this platform\n"
+msgstr "%s: WARNING: 无法为该平台创建受限制的令牌\n"
+
+#: ../../common/restricted_token.c:77
+#, c-format
+msgid "%s: could not open process token: error code %lu\n"
+msgstr "%s:无法打开进程令牌 (token): 错误码 %lu\n"
+
+#: ../../common/restricted_token.c:90
+#, c-format
+msgid "%s: could not allocate SIDs: error code %lu\n"
+msgstr "%s: 无法分配SID: 错误码 %lu\n"
+
+#: ../../common/restricted_token.c:110
+#, c-format
+msgid "%s: could not create restricted token: error code %lu\n"
+msgstr "%s: 无法创建受限令牌: 错误码为 %lu\n"
+
+#: ../../common/restricted_token.c:132
+#, c-format
+msgid "%s: could not start process for command \"%s\": error code %lu\n"
+msgstr "%s: 无法为命令 \"%s\"创建进程: 错误码 %lu\n"
+
+#: ../../common/restricted_token.c:170
+#, c-format
+msgid "%s: could not re-execute with restricted token: error code %lu\n"
+msgstr "%s: 无法使用受限令牌再次执行: 错误码 %lu\n"
+
+#: ../../common/restricted_token.c:186
+#, c-format
+msgid "%s: could not get exit code from subprocess: error code %lu\n"
+msgstr "%s: 无法从子进程得到退出码: 错误码 %lu\n"
+
+#. translator: the second %s is a command line argument (-e, etc)
+#: pg_resetxlog.c:140 pg_resetxlog.c:155 pg_resetxlog.c:170 pg_resetxlog.c:177
+#: pg_resetxlog.c:201 pg_resetxlog.c:216 pg_resetxlog.c:224 pg_resetxlog.c:250
+#: pg_resetxlog.c:264
+#, c-format
+msgid "%s: invalid argument for option %s\n"
+msgstr "%s::选项%s的参数无效\n"
+
+#: pg_resetxlog.c:141 pg_resetxlog.c:156 pg_resetxlog.c:171 pg_resetxlog.c:178
+#: pg_resetxlog.c:202 pg_resetxlog.c:217 pg_resetxlog.c:225 pg_resetxlog.c:251
+#: pg_resetxlog.c:265 pg_resetxlog.c:272 pg_resetxlog.c:285 pg_resetxlog.c:293
+#, c-format
+msgid "Try \"%s --help\" for more information.\n"
+msgstr "输入 \"%s --help\" 获取更多的信息.\n"
+
+#: pg_resetxlog.c:146
+#, c-format
+msgid "%s: transaction ID epoch (-e) must not be -1\n"
+msgstr "%s: 事务ID epoch(-e) 不能为 -1\n"
+
+#: pg_resetxlog.c:161
+#, c-format
+msgid "%s: transaction ID (-x) must not be 0\n"
+msgstr "%s: 事务 ID (-x) 不能为 0\n"
+
+#: pg_resetxlog.c:185 pg_resetxlog.c:192
+#, c-format
+msgid ""
+"%s: transaction ID (-c) must be either 0 or greater than or equal to 2\n"
+msgstr "%s:事务ID (-c) 必须是0或者大于等于2\n"
+
+#: pg_resetxlog.c:207
+#, c-format
+msgid "%s: OID (-o) must not be 0\n"
+msgstr "%s: OID (-o) 不能为 0\n"
+
+#: pg_resetxlog.c:230
+#, c-format
+msgid "%s: multitransaction ID (-m) must not be 0\n"
+msgstr "%s: 多事务 ID (-m) 不能为 0\n"
+
+#: pg_resetxlog.c:240
+#, c-format
+msgid "%s: oldest multitransaction ID (-m) must not be 0\n"
+msgstr "%s: 最老的多事务 ID (-m) 不能为 0\n"
+
+#: pg_resetxlog.c:256
+#, c-format
+msgid "%s: multitransaction offset (-O) must not be -1\n"
+msgstr "%s: 多事务 偏移 (-O) 不能为-1\n"
+
+#: pg_resetxlog.c:283
+#, c-format
+msgid "%s: too many command-line arguments (first is \"%s\")\n"
+msgstr "%s: 命令行参数太多 (第一个是 \"%s\")\n"
+
+#: pg_resetxlog.c:292
+#, c-format
+msgid "%s: no data directory specified\n"
+msgstr "%s: 没有指定数据目录\n"
+
+#: pg_resetxlog.c:306
+#, c-format
+msgid "%s: cannot be executed by \"root\"\n"
+msgstr "%s:不能由\"root\"执行\n"
+
+#: pg_resetxlog.c:308
+#, c-format
+msgid "You must run %s as the PostgreSQL superuser.\n"
+msgstr "您现在作为PostgreSQL超级用户运行%s.\n"
+
+# command.c:256
+#: pg_resetxlog.c:318
+#, c-format
+msgid "%s: could not change directory to \"%s\": %s\n"
+msgstr "%s: 无法切换目录至 \"%s\": %s\n"
+
+#: pg_resetxlog.c:331 pg_resetxlog.c:477
+#, c-format
+msgid "%s: could not open file \"%s\" for reading: %s\n"
+msgstr "%s: 无法打开文件 \"%s\" 读取信息: %s\n"
+
+#: pg_resetxlog.c:338
+#, c-format
+msgid ""
+"%s: lock file \"%s\" exists\n"
+"Is a server running? If not, delete the lock file and try again.\n"
+msgstr ""
+"%s: 锁文件 \"%s\" 已经存在\n"
+"是否有一个服务正在运行? 如果没有, 删除那个锁文件然后再试一次.\n"
+
+#: pg_resetxlog.c:425
+#, c-format
+msgid ""
+"\n"
+"If these values seem acceptable, use -f to force reset.\n"
+msgstr ""
+"\n"
+"如果这些值可接受, 用 -f 强制重置.\n"
+
+#: pg_resetxlog.c:437
+#, c-format
+msgid ""
+"The database server was not shut down cleanly.\n"
+"Resetting the transaction log might cause data to be lost.\n"
+"If you want to proceed anyway, use -f to force reset.\n"
+msgstr ""
+"数据库服务器没有彻底关闭.\n"
+"重置事务日志有可能会引起丢失数据.\n"
+"如果你仍想继续, 用 -f 强制重置.\n"
+
+#: pg_resetxlog.c:451
+#, c-format
+msgid "Transaction log reset\n"
+msgstr "事务日志重置\n"
+
+#: pg_resetxlog.c:480
+#, c-format
+msgid ""
+"If you are sure the data directory path is correct, execute\n"
+" touch %s\n"
+"and try again.\n"
+msgstr ""
+"如果你确定数据目录路径是正确的, 运行\n"
+" touch %s\n"
+"然后再试一次.\n"
+
+#: pg_resetxlog.c:493
+#, c-format
+msgid "%s: could not read file \"%s\": %s\n"
+msgstr "%s: 无法读取文件 \"%s\": %s\n"
+
+#: pg_resetxlog.c:516
+#, c-format
+msgid "%s: pg_control exists but has invalid CRC; proceed with caution\n"
+msgstr "%s: pg_control 已经存在, 但有无效的CRC; 带有警告的继续运行\n"
+
+#: pg_resetxlog.c:525
+#, c-format
+msgid "%s: pg_control exists but is broken or unknown version; ignoring it\n"
+msgstr "%s: pg_control 已经存在, 但已破坏或无效版本; 忽略它\n"
+
+#: pg_resetxlog.c:628
+#, c-format
+msgid ""
+"Guessed pg_control values:\n"
+"\n"
+msgstr ""
+"猜测的 pg_control 值:\n"
+"\n"
+
+#: pg_resetxlog.c:630
+#, c-format
+msgid ""
+"Current pg_control values:\n"
+"\n"
+msgstr ""
+"当前的 pg_control 值:\n"
+"\n"
+
+#: pg_resetxlog.c:639
+#, c-format
+msgid "pg_control version number: %u\n"
+msgstr "pg_control 版本: %u\n"
+
+#: pg_resetxlog.c:641
+#, c-format
+msgid "Catalog version number: %u\n"
+msgstr "Catalog 版本: %u\n"
+
+#: pg_resetxlog.c:643
+#, c-format
+msgid "Database system identifier: %s\n"
+msgstr "数据库系统标识符: %s\n"
+
+#: pg_resetxlog.c:645
+#, c-format
+msgid "Latest checkpoint's TimeLineID: %u\n"
+msgstr "最新检查点的 TimeLineID: %u\n"
+
+#: pg_resetxlog.c:647
+#, c-format
+msgid "Latest checkpoint's full_page_writes: %s\n"
+msgstr "最新检查点的full_page_writes: %s\n"
+
+# help.c:48
+#: pg_resetxlog.c:648
+msgid "off"
+msgstr "关闭"
+
+# help.c:48
+#: pg_resetxlog.c:648
+msgid "on"
+msgstr "开启"
+
+#: pg_resetxlog.c:649
+#, c-format
+#| msgid "Latest checkpoint's NextXID: %u/%u\n"
+msgid "Latest checkpoint's NextXID: %u:%u\n"
+msgstr "最新检查点的NextXID: %u:%u\n"
+
+#: pg_resetxlog.c:652
+#, c-format
+msgid "Latest checkpoint's NextOID: %u\n"
+msgstr "最新检查点的 NextOID: %u\n"
+
+#: pg_resetxlog.c:654
+#, c-format
+msgid "Latest checkpoint's NextMultiXactId: %u\n"
+msgstr "最新检查点的 NextMultiXactId: %u\n"
+
+#: pg_resetxlog.c:656
+#, c-format
+msgid "Latest checkpoint's NextMultiOffset: %u\n"
+msgstr "最新检查点的 NextMultiOffset: %u\n"
+
+#: pg_resetxlog.c:658
+#, c-format
+msgid "Latest checkpoint's oldestXID: %u\n"
+msgstr "最新检查点的oldestXID: %u\n"
+
+#: pg_resetxlog.c:660
+#, c-format
+msgid "Latest checkpoint's oldestXID's DB: %u\n"
+msgstr "最新检查点的oldestXID所在的数据库: %u\n"
+
+#: pg_resetxlog.c:662
+#, c-format
+msgid "Latest checkpoint's oldestActiveXID: %u\n"
+msgstr "最新检查点的oldestActiveXID: %u\n"
+
+#: pg_resetxlog.c:664
+#, c-format
+msgid "Latest checkpoint's oldestMultiXid: %u\n"
+msgstr "最新检查点的oldestMultiXid: %u\n"
+
+#: pg_resetxlog.c:666
+#, c-format
+msgid "Latest checkpoint's oldestMulti's DB: %u\n"
+msgstr "最新检查点的oldestMulti所在的数据库: %u\n"
+
+#: pg_resetxlog.c:668
+#, c-format
+#| msgid "Latest checkpoint's oldestCommitTs: %u\n"
+msgid "Latest checkpoint's oldestCommitTsXid:%u\n"
+msgstr "最新检查点的oldestCommitTsXid:%u\n"
+
+#: pg_resetxlog.c:670
+#, c-format
+#| msgid "Latest checkpoint's newestCommitTs: %u\n"
+msgid "Latest checkpoint's newestCommitTsXid:%u\n"
+msgstr "最新检查点的newestCommitTsXid:%u\n"
+
+#: pg_resetxlog.c:672
+#, c-format
+msgid "Maximum data alignment: %u\n"
+msgstr "最大的数据校准: %u\n"
+
+#: pg_resetxlog.c:675
+#, c-format
+msgid "Database block size: %u\n"
+msgstr "数据库块大小: %u\n"
+
+#: pg_resetxlog.c:677
+#, c-format
+msgid "Blocks per segment of large relation: %u\n"
+msgstr "大关系的每段块数: %u\n"
+
+#: pg_resetxlog.c:679
+#, c-format
+msgid "WAL block size: %u\n"
+msgstr "WAL块大小: %u\n"
+
+#: pg_resetxlog.c:681
+#, c-format
+msgid "Bytes per WAL segment: %u\n"
+msgstr "每一个 WAL 段字节数: %u\n"
+
+#: pg_resetxlog.c:683
+#, c-format
+msgid "Maximum length of identifiers: %u\n"
+msgstr "标示符的最大长度: %u\n"
+
+#: pg_resetxlog.c:685
+#, c-format
+msgid "Maximum columns in an index: %u\n"
+msgstr "在索引中最多可用的列数: %u\n"
+
+#: pg_resetxlog.c:687
+#, c-format
+msgid "Maximum size of a TOAST chunk: %u\n"
+msgstr "一个TOAST区块的最大空间: %u\n"
+
+#: pg_resetxlog.c:689
+#, c-format
+msgid "Size of a large-object chunk: %u\n"
+msgstr "一个大对象区块的大小: %u\n"
+
+#: pg_resetxlog.c:691
+#, c-format
+msgid "Date/time type storage: %s\n"
+msgstr "日期/时间类型存储: %s\n"
+
+#: pg_resetxlog.c:692
+msgid "64-bit integers"
+msgstr "64位整型"
+
+#: pg_resetxlog.c:692
+msgid "floating-point numbers"
+msgstr "浮点数"
+
+#: pg_resetxlog.c:693
+#, c-format
+msgid "Float4 argument passing: %s\n"
+msgstr "正在传递Float4类型的参数: %s\n"
+
+#: pg_resetxlog.c:694 pg_resetxlog.c:696
+msgid "by reference"
+msgstr "由引用"
+
+#: pg_resetxlog.c:694 pg_resetxlog.c:696
+msgid "by value"
+msgstr "由值"
+
+#: pg_resetxlog.c:695
+#, c-format
+msgid "Float8 argument passing: %s\n"
+msgstr "正在传递Float8类型的参数: %s\n"
+
+#: pg_resetxlog.c:697
+#, c-format
+msgid "Data page checksum version: %u\n"
+msgstr "数据页检验和版本: %u\n"
+
+#: pg_resetxlog.c:711
+#, c-format
+msgid ""
+"\n"
+"\n"
+"Values to be changed:\n"
+"\n"
+msgstr ""
+"\n"
+"\n"
+"将被改变的值:\n"
+"\n"
+
+#: pg_resetxlog.c:714
+#, c-format
+msgid "First log segment after reset: %s\n"
+msgstr "重置后的第一个日志段: %s\n"
+
+#: pg_resetxlog.c:718
+#, c-format
+msgid "NextMultiXactId: %u\n"
+msgstr "下一个MultiXactId值NextMultiXactId: %u\n"
+
+#: pg_resetxlog.c:720
+#, c-format
+msgid "OldestMultiXid: %u\n"
+msgstr "最老的MultiXid值OldestMultiXid: %u\n"
+
+#: pg_resetxlog.c:722
+#, c-format
+msgid "OldestMulti's DB: %u\n"
+msgstr "最老的MultiXid对应的DB: %u\n"
+
+#: pg_resetxlog.c:728
+#, c-format
+msgid "NextMultiOffset: %u\n"
+msgstr "下一个偏移NextMultiOffset: %u\n"
+
+#: pg_resetxlog.c:734
+#, c-format
+msgid "NextOID: %u\n"
+msgstr "NextOID: %u\n"
+
+#: pg_resetxlog.c:740
+#, c-format
+msgid "NextXID: %u\n"
+msgstr "NextXID: %u\n"
+
+#: pg_resetxlog.c:742
+#, c-format
+msgid "OldestXID: %u\n"
+msgstr "OldestXID: %u\n"
+
+#: pg_resetxlog.c:744
+#, c-format
+msgid "OldestXID's DB: %u\n"
+msgstr "OldestXID's DB: %u\n"
+
+#: pg_resetxlog.c:750
+#, c-format
+msgid "NextXID epoch: %u\n"
+msgstr "NextXID 末端: %u\n"
+
+#: pg_resetxlog.c:756
+#, c-format
+#| msgid "oldestCommitTs: %u\n"
+msgid "oldestCommitTsXid: %u\n"
+msgstr "oldestCommitTsXid: %u\n"
+
+#: pg_resetxlog.c:761
+#, c-format
+#| msgid "newestCommitTs: %u\n"
+msgid "newestCommitTsXid: %u\n"
+msgstr "newestCommitTsXid: %u\n"
+
+#: pg_resetxlog.c:827
+#, c-format
+msgid ""
+"%s: internal error -- sizeof(ControlFileData) is too large ... fix "
+"PG_CONTROL_SIZE\n"
+msgstr "%s: 内部错误 -- sizeof(ControlFileData) 太大 ... 修复 xlog.c\n"
+
+#: pg_resetxlog.c:842
+#, c-format
+msgid "%s: could not create pg_control file: %s\n"
+msgstr "%s: 无法创建 pg_control 文件: %s\n"
+
+#: pg_resetxlog.c:853
+#, c-format
+msgid "%s: could not write pg_control file: %s\n"
+msgstr "%s: 无法写 pg_control 文件: %s\n"
+
+#: pg_resetxlog.c:860 pg_resetxlog.c:1156
+#, c-format
+msgid "%s: fsync error: %s\n"
+msgstr "%s: fsync 错误: %s\n"
+
+#: pg_resetxlog.c:900 pg_resetxlog.c:971 pg_resetxlog.c:1022
+#, c-format
+msgid "%s: could not open directory \"%s\": %s\n"
+msgstr "%s: 无法打开目录 \"%s\": %s\n"
+
+#: pg_resetxlog.c:936 pg_resetxlog.c:993 pg_resetxlog.c:1047
+#, c-format
+msgid "%s: could not read directory \"%s\": %s\n"
+msgstr "%s: 无法读取目录 \"%s\": %s\n"
+
+#: pg_resetxlog.c:943 pg_resetxlog.c:1000 pg_resetxlog.c:1054
+#, c-format
+msgid "%s: could not close directory \"%s\": %s\n"
+msgstr "%s: 无法关闭目录 \"%s\": %s\n"
+
+#: pg_resetxlog.c:984 pg_resetxlog.c:1038
+#, c-format
+msgid "%s: could not delete file \"%s\": %s\n"
+msgstr "%s: 无法删除文件 \"%s\": %s\n"
+
+#: pg_resetxlog.c:1123
+#, c-format
+msgid "%s: could not open file \"%s\": %s\n"
+msgstr "%s: 无法打开文件 \"%s\": %s\n"
+
+#: pg_resetxlog.c:1134 pg_resetxlog.c:1148
+#, c-format
+msgid "%s: could not write file \"%s\": %s\n"
+msgstr "%s: 无法写文件 \"%s\": %s\n"
+
+#: pg_resetxlog.c:1167
+#, c-format
+msgid ""
+"%s resets the PostgreSQL transaction log.\n"
+"\n"
+msgstr ""
+"%s 重置 PostgreSQL 事务日志.\n"
+"\n"
+
+#: pg_resetxlog.c:1168
+#, c-format
+msgid ""
+"Usage:\n"
+" %s [OPTION]... DATADIR\n"
+"\n"
+msgstr ""
+"使用方法:\n"
+" %s [选项]... 数据目录\n"
+"\n"
+
+#: pg_resetxlog.c:1169
+#, c-format
+msgid "Options:\n"
+msgstr "选项:\n"
+
+#: pg_resetxlog.c:1170
+#, c-format
+msgid ""
+" -c XID,XID set oldest and newest transactions bearing commit "
+"timestamp\n"
+msgstr " -c XID,XID 设置承受提交时间戳的最旧和最新事务\n"
+
+#: pg_resetxlog.c:1171
+#, c-format
+msgid " (zero in either value means no change)\n"
+msgstr " (任一值中的零表示没有改变)\n"
+
+#: pg_resetxlog.c:1172
+#, c-format
+msgid " [-D] DATADIR data directory\n"
+msgstr " [-D] DATADIR 数据目录\n"
+
+#: pg_resetxlog.c:1173
+#, c-format
+msgid " -e XIDEPOCH set next transaction ID epoch\n"
+msgstr " -e XIDEPOCH 设置下一个事务ID时间单元(epoch)\n"
+
+#: pg_resetxlog.c:1174
+#, c-format
+msgid " -f force update to be done\n"
+msgstr " -f 强制更新\n"
+
+#: pg_resetxlog.c:1175
+#, c-format
+msgid ""
+" -l XLOGFILE force minimum WAL starting location for new transaction "
+"log\n"
+msgstr " -l XLOGFILE 为新的事务日志强制使用最小WAL日志起始位置\n"
+
+#: pg_resetxlog.c:1176
+#, c-format
+msgid " -m MXID,MXID set next and oldest multitransaction ID\n"
+msgstr " -m MXID,MXID 设置下一个事务和最老的事务ID\n"
+
+#: pg_resetxlog.c:1177
+#, c-format
+msgid ""
+" -n no update, just show what would be done (for testing)\n"
+msgstr " -n 未更新, 只显示将要做什么 (测试用途)\n"
+
+#: pg_resetxlog.c:1178
+#, c-format
+msgid " -o OID set next OID\n"
+msgstr " -o OID 设置下一个 OID\n"
+
+#: pg_resetxlog.c:1179
+#, c-format
+msgid " -O OFFSET set next multitransaction offset\n"
+msgstr " -O OFFSET 设置下一个多事务(multitransaction)偏移\n"
+
+#: pg_resetxlog.c:1180
+#, c-format
+msgid " -V, --version output version information, then exit\n"
+msgstr " -V, --version 输出版本信息,然后退出\n"
+
+#: pg_resetxlog.c:1181
+#, c-format
+msgid " -x XID set next transaction ID\n"
+msgstr " -x XID 设置下一个事务 ID\n"
+
+#: pg_resetxlog.c:1182
+#, c-format
+msgid " -?, --help show this help, then exit\n"
+msgstr " -?, --help 显示帮助信息,然后退出\n"
+
+#: pg_resetxlog.c:1183
+#, c-format
+msgid ""
+"\n"
+"Report bugs to .\n"
+msgstr ""
+"\n"
+"报告错误至 .\n"
+
+#~ msgid "%s: invalid argument for option -x\n"
+#~ msgstr "%s: 为 -x 选项的无效参数\n"
+
+#~ msgid "%s: invalid argument for option -o\n"
+#~ msgstr "%s: 为 -o 选项的无效参数\n"
+
+#~ msgid "%s: invalid argument for option -m\n"
+#~ msgstr "%s: 对于选项-m 参数无效\n"
+
+#~ msgid "%s: invalid argument for option -O\n"
+#~ msgstr "%s: 对于选项-O 参数无效\n"
+
+#~ msgid "%s: invalid argument for option -l\n"
+#~ msgstr "%s: 为 -l 选项的无效参数\n"
+
+#~ msgid "First log file ID after reset: %u\n"
+#~ msgstr "重置后的第一个日志文件ID: %u\n"
+
+#~ msgid "%s: invalid argument for -o option\n"
+#~ msgstr "%s: 为 -o 选项的无效参数\n"
+
+#~ msgid "%s: invalid argument for -x option\n"
+#~ msgstr "%s: 为 -x 选项的无效参数\n"
+
+#~ msgid "Latest checkpoint's StartUpID: %u\n"
+#~ msgstr "最新检查点的 StartUpID: %u\n"
+
+#~ msgid "Maximum number of function arguments: %u\n"
+#~ msgstr "函数参数的最大个数: %u\n"
+
+#~ msgid "%s: invalid LC_CTYPE setting\n"
+#~ msgstr "%s: 无效的 LC_CTYPE 设置\n"
+
+#~ msgid "%s: invalid LC_COLLATE setting\n"
+#~ msgstr "%s: 无效的 LC_COLLATE 设置\n"
+
+#~ msgid " --version output version information, then exit\n"
+#~ msgstr " --version 输出版本信息, 然后退出\n"
+
+#~ msgid " --help show this help, then exit\n"
+#~ msgstr " --help 显示此帮助信息, 然后退出\n"
+
+#~ msgid "%s: could not read from directory \"%s\": %s\n"
+#~ msgstr "%s: 无法从目录 \"%s\" 中读取: %s\n"
diff --git a/src/bin/pg_resetxlog/.gitignore b/src/bin/pg_resetxlog/.gitignore
deleted file mode 100644
index 6b84208ee0..0000000000
--- a/src/bin/pg_resetxlog/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-/pg_resetxlog
diff --git a/src/bin/pg_resetxlog/Makefile b/src/bin/pg_resetxlog/Makefile
deleted file mode 100644
index e04892479b..0000000000
--- a/src/bin/pg_resetxlog/Makefile
+++ /dev/null
@@ -1,35 +0,0 @@
-#-------------------------------------------------------------------------
-#
-# Makefile for src/bin/pg_resetxlog
-#
-# Copyright (c) 1998-2017, PostgreSQL Global Development Group
-#
-# src/bin/pg_resetxlog/Makefile
-#
-#-------------------------------------------------------------------------
-
-PGFILEDESC = "pg_resetxlog - reset PostgreSQL WAL log"
-PGAPPICON=win32
-
-subdir = src/bin/pg_resetxlog
-top_builddir = ../../..
-include $(top_builddir)/src/Makefile.global
-
-OBJS= pg_resetxlog.o $(WIN32RES)
-
-all: pg_resetxlog
-
-pg_resetxlog: $(OBJS) | submake-libpgport
- $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
-
-install: all installdirs
- $(INSTALL_PROGRAM) pg_resetxlog$(X) '$(DESTDIR)$(bindir)/pg_resetxlog$(X)'
-
-installdirs:
- $(MKDIR_P) '$(DESTDIR)$(bindir)'
-
-uninstall:
- rm -f '$(DESTDIR)$(bindir)/pg_resetxlog$(X)'
-
-clean distclean maintainer-clean:
- rm -f pg_resetxlog$(X) $(OBJS)
diff --git a/src/bin/pg_resetxlog/nls.mk b/src/bin/pg_resetxlog/nls.mk
deleted file mode 100644
index ce18674d4b..0000000000
--- a/src/bin/pg_resetxlog/nls.mk
+++ /dev/null
@@ -1,4 +0,0 @@
-# src/bin/pg_resetxlog/nls.mk
-CATALOG_NAME = pg_resetxlog
-AVAIL_LANGUAGES = cs de es fr it ja ko pl pt_BR ru sv zh_CN
-GETTEXT_FILES = pg_resetxlog.c ../../common/restricted_token.c
diff --git a/src/bin/pg_resetxlog/pg_resetxlog.c b/src/bin/pg_resetxlog/pg_resetxlog.c
deleted file mode 100644
index 963802efc8..0000000000
--- a/src/bin/pg_resetxlog/pg_resetxlog.c
+++ /dev/null
@@ -1,1184 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * pg_resetxlog.c
- * A utility to "zero out" the xlog when it's corrupt beyond recovery.
- * Can also rebuild pg_control if needed.
- *
- * The theory of operation is fairly simple:
- * 1. Read the existing pg_control (which will include the last
- * checkpoint record). If it is an old format then update to
- * current format.
- * 2. If pg_control is corrupt, attempt to intuit reasonable values,
- * by scanning the old xlog if necessary.
- * 3. Modify pg_control to reflect a "shutdown" state with a checkpoint
- * record at the start of xlog.
- * 4. Flush the existing xlog files and write a new segment with
- * just a checkpoint record in it. The new segment is positioned
- * just past the end of the old xlog, so that existing LSNs in
- * data pages will appear to be "in the past".
- * This is all pretty straightforward except for the intuition part of
- * step 2 ...
- *
- *
- * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * src/bin/pg_resetxlog/pg_resetxlog.c
- *
- *-------------------------------------------------------------------------
- */
-
-/*
- * We have to use postgres.h not postgres_fe.h here, because there's so much
- * backend-only stuff in the XLOG include files we need. But we need a
- * frontend-ish environment otherwise. Hence this ugly hack.
- */
-#define FRONTEND 1
-
-#include "postgres.h"
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-#include "access/transam.h"
-#include "access/tuptoaster.h"
-#include "access/multixact.h"
-#include "access/xlog.h"
-#include "access/xlog_internal.h"
-#include "catalog/catversion.h"
-#include "catalog/pg_control.h"
-#include "common/fe_memutils.h"
-#include "common/restricted_token.h"
-#include "storage/large_object.h"
-#include "pg_getopt.h"
-
-
-static ControlFileData ControlFile; /* pg_control values */
-static XLogSegNo newXlogSegNo; /* new XLOG segment # */
-static bool guessed = false; /* T if we had to guess at any values */
-static const char *progname;
-static uint32 set_xid_epoch = (uint32) -1;
-static TransactionId set_xid = 0;
-static TransactionId set_oldest_commit_ts_xid = 0;
-static TransactionId set_newest_commit_ts_xid = 0;
-static Oid set_oid = 0;
-static MultiXactId set_mxid = 0;
-static MultiXactOffset set_mxoff = (MultiXactOffset) -1;
-static uint32 minXlogTli = 0;
-static XLogSegNo minXlogSegNo = 0;
-
-static bool ReadControlFile(void);
-static void GuessControlValues(void);
-static void PrintControlValues(bool guessed);
-static void PrintNewControlValues(void);
-static void RewriteControlFile(void);
-static void FindEndOfXLOG(void);
-static void KillExistingXLOG(void);
-static void KillExistingArchiveStatus(void);
-static void WriteEmptyXLOG(void);
-static void usage(void);
-
-
-int
-main(int argc, char *argv[])
-{
- int c;
- bool force = false;
- bool noupdate = false;
- MultiXactId set_oldestmxid = 0;
- char *endptr;
- char *endptr2;
- char *DataDir = NULL;
- int fd;
-
- set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_resetxlog"));
-
- progname = get_progname(argv[0]);
-
- if (argc > 1)
- {
- if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
- {
- usage();
- exit(0);
- }
- if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
- {
- puts("pg_resetxlog (PostgreSQL) " PG_VERSION);
- exit(0);
- }
- }
-
-
- while ((c = getopt(argc, argv, "c:D:e:fl:m:no:O:x:")) != -1)
- {
- switch (c)
- {
- case 'D':
- DataDir = optarg;
- break;
-
- case 'f':
- force = true;
- break;
-
- case 'n':
- noupdate = true;
- break;
-
- case 'e':
- set_xid_epoch = strtoul(optarg, &endptr, 0);
- if (endptr == optarg || *endptr != '\0')
- {
- /*------
- translator: the second %s is a command line argument (-e, etc) */
- fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-e");
- fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
- exit(1);
- }
- if (set_xid_epoch == -1)
- {
- fprintf(stderr, _("%s: transaction ID epoch (-e) must not be -1\n"), progname);
- exit(1);
- }
- break;
-
- case 'x':
- set_xid = strtoul(optarg, &endptr, 0);
- if (endptr == optarg || *endptr != '\0')
- {
- fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-x");
- fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
- exit(1);
- }
- if (set_xid == 0)
- {
- fprintf(stderr, _("%s: transaction ID (-x) must not be 0\n"), progname);
- exit(1);
- }
- break;
-
- case 'c':
- set_oldest_commit_ts_xid = strtoul(optarg, &endptr, 0);
- if (endptr == optarg || *endptr != ',')
- {
- fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-c");
- fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
- exit(1);
- }
- set_newest_commit_ts_xid = strtoul(endptr + 1, &endptr2, 0);
- if (endptr2 == endptr + 1 || *endptr2 != '\0')
- {
- fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-c");
- fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
- exit(1);
- }
-
- if (set_oldest_commit_ts_xid < 2 &&
- set_oldest_commit_ts_xid != 0)
- {
- fprintf(stderr, _("%s: transaction ID (-c) must be either 0 or greater than or equal to 2\n"), progname);
- exit(1);
- }
-
- if (set_newest_commit_ts_xid < 2 &&
- set_newest_commit_ts_xid != 0)
- {
- fprintf(stderr, _("%s: transaction ID (-c) must be either 0 or greater than or equal to 2\n"), progname);
- exit(1);
- }
- break;
-
- case 'o':
- set_oid = strtoul(optarg, &endptr, 0);
- if (endptr == optarg || *endptr != '\0')
- {
- fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-o");
- fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
- exit(1);
- }
- if (set_oid == 0)
- {
- fprintf(stderr, _("%s: OID (-o) must not be 0\n"), progname);
- exit(1);
- }
- break;
-
- case 'm':
- set_mxid = strtoul(optarg, &endptr, 0);
- if (endptr == optarg || *endptr != ',')
- {
- fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-m");
- fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
- exit(1);
- }
-
- set_oldestmxid = strtoul(endptr + 1, &endptr2, 0);
- if (endptr2 == endptr + 1 || *endptr2 != '\0')
- {
- fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-m");
- fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
- exit(1);
- }
- if (set_mxid == 0)
- {
- fprintf(stderr, _("%s: multitransaction ID (-m) must not be 0\n"), progname);
- exit(1);
- }
-
- /*
- * XXX It'd be nice to have more sanity checks here, e.g. so
- * that oldest is not wrapped around w.r.t. nextMulti.
- */
- if (set_oldestmxid == 0)
- {
- fprintf(stderr, _("%s: oldest multitransaction ID (-m) must not be 0\n"),
- progname);
- exit(1);
- }
- break;
-
- case 'O':
- set_mxoff = strtoul(optarg, &endptr, 0);
- if (endptr == optarg || *endptr != '\0')
- {
- fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-O");
- fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
- exit(1);
- }
- if (set_mxoff == -1)
- {
- fprintf(stderr, _("%s: multitransaction offset (-O) must not be -1\n"), progname);
- exit(1);
- }
- break;
-
- case 'l':
- if (strspn(optarg, "01234567890ABCDEFabcdef") != XLOG_FNAME_LEN)
- {
- fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-l");
- fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
- exit(1);
- }
- XLogFromFileName(optarg, &minXlogTli, &minXlogSegNo);
- break;
-
- default:
- fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
- exit(1);
- }
- }
-
- if (DataDir == NULL && optind < argc)
- DataDir = argv[optind++];
-
- /* Complain if any arguments remain */
- if (optind < argc)
- {
- fprintf(stderr, _("%s: too many command-line arguments (first is \"%s\")\n"),
- progname, argv[optind]);
- fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
- progname);
- exit(1);
- }
-
- if (DataDir == NULL)
- {
- fprintf(stderr, _("%s: no data directory specified\n"), progname);
- fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
- exit(1);
- }
-
- /*
- * Don't allow pg_resetxlog to be run as root, to avoid overwriting the
- * ownership of files in the data directory. We need only check for root
- * -- any other user won't have sufficient permissions to modify files in
- * the data directory.
- */
-#ifndef WIN32
- if (geteuid() == 0)
- {
- fprintf(stderr, _("%s: cannot be executed by \"root\"\n"),
- progname);
- fprintf(stderr, _("You must run %s as the PostgreSQL superuser.\n"),
- progname);
- exit(1);
- }
-#endif
-
- get_restricted_token(progname);
-
- if (chdir(DataDir) < 0)
- {
- fprintf(stderr, _("%s: could not change directory to \"%s\": %s\n"),
- progname, DataDir, strerror(errno));
- exit(1);
- }
-
- /*
- * Check for a postmaster lock file --- if there is one, refuse to
- * proceed, on grounds we might be interfering with a live installation.
- */
- if ((fd = open("postmaster.pid", O_RDONLY, 0)) < 0)
- {
- if (errno != ENOENT)
- {
- fprintf(stderr, _("%s: could not open file \"%s\" for reading: %s\n"),
- progname, "postmaster.pid", strerror(errno));
- exit(1);
- }
- }
- else
- {
- fprintf(stderr, _("%s: lock file \"%s\" exists\n"
- "Is a server running? If not, delete the lock file and try again.\n"),
- progname, "postmaster.pid");
- exit(1);
- }
-
- /*
- * Attempt to read the existing pg_control file
- */
- if (!ReadControlFile())
- GuessControlValues();
-
- /*
- * Also look at existing segment files to set up newXlogSegNo
- */
- FindEndOfXLOG();
-
- /*
- * If we're not going to proceed with the reset, print the current control
- * file parameters.
- */
- if ((guessed && !force) || noupdate)
- PrintControlValues(guessed);
-
- /*
- * Adjust fields if required by switches. (Do this now so that printout,
- * if any, includes these values.)
- */
- if (set_xid_epoch != -1)
- ControlFile.checkPointCopy.nextXidEpoch = set_xid_epoch;
-
- if (set_xid != 0)
- {
- ControlFile.checkPointCopy.nextXid = set_xid;
-
- /*
- * For the moment, just set oldestXid to a value that will force
- * immediate autovacuum-for-wraparound. It's not clear whether adding
- * user control of this is useful, so let's just do something that's
- * reasonably safe. The magic constant here corresponds to the
- * maximum allowed value of autovacuum_freeze_max_age.
- */
- ControlFile.checkPointCopy.oldestXid = set_xid - 2000000000;
- if (ControlFile.checkPointCopy.oldestXid < FirstNormalTransactionId)
- ControlFile.checkPointCopy.oldestXid += FirstNormalTransactionId;
- ControlFile.checkPointCopy.oldestXidDB = InvalidOid;
- }
-
- if (set_oldest_commit_ts_xid != 0)
- ControlFile.checkPointCopy.oldestCommitTsXid = set_oldest_commit_ts_xid;
- if (set_newest_commit_ts_xid != 0)
- ControlFile.checkPointCopy.newestCommitTsXid = set_newest_commit_ts_xid;
-
- if (set_oid != 0)
- ControlFile.checkPointCopy.nextOid = set_oid;
-
- if (set_mxid != 0)
- {
- ControlFile.checkPointCopy.nextMulti = set_mxid;
-
- ControlFile.checkPointCopy.oldestMulti = set_oldestmxid;
- if (ControlFile.checkPointCopy.oldestMulti < FirstMultiXactId)
- ControlFile.checkPointCopy.oldestMulti += FirstMultiXactId;
- ControlFile.checkPointCopy.oldestMultiDB = InvalidOid;
- }
-
- if (set_mxoff != -1)
- ControlFile.checkPointCopy.nextMultiOffset = set_mxoff;
-
- if (minXlogTli > ControlFile.checkPointCopy.ThisTimeLineID)
- {
- ControlFile.checkPointCopy.ThisTimeLineID = minXlogTli;
- ControlFile.checkPointCopy.PrevTimeLineID = minXlogTli;
- }
-
- if (minXlogSegNo > newXlogSegNo)
- newXlogSegNo = minXlogSegNo;
-
- /*
- * If we had to guess anything, and -f was not given, just print the
- * guessed values and exit. Also print if -n is given.
- */
- if ((guessed && !force) || noupdate)
- {
- PrintNewControlValues();
- if (!noupdate)
- {
- printf(_("\nIf these values seem acceptable, use -f to force reset.\n"));
- exit(1);
- }
- else
- exit(0);
- }
-
- /*
- * Don't reset from a dirty pg_control without -f, either.
- */
- if (ControlFile.state != DB_SHUTDOWNED && !force)
- {
- printf(_("The database server was not shut down cleanly.\n"
- "Resetting the transaction log might cause data to be lost.\n"
- "If you want to proceed anyway, use -f to force reset.\n"));
- exit(1);
- }
-
- /*
- * Else, do the dirty deed.
- */
- RewriteControlFile();
- KillExistingXLOG();
- KillExistingArchiveStatus();
- WriteEmptyXLOG();
-
- printf(_("Transaction log reset\n"));
- return 0;
-}
-
-
-/*
- * Try to read the existing pg_control file.
- *
- * This routine is also responsible for updating old pg_control versions
- * to the current format. (Currently we don't do anything of the sort.)
- */
-static bool
-ReadControlFile(void)
-{
- int fd;
- int len;
- char *buffer;
- pg_crc32c crc;
-
- if ((fd = open(XLOG_CONTROL_FILE, O_RDONLY | PG_BINARY, 0)) < 0)
- {
- /*
- * If pg_control is not there at all, or we can't read it, the odds
- * are we've been handed a bad DataDir path, so give up. User can do
- * "touch pg_control" to force us to proceed.
- */
- fprintf(stderr, _("%s: could not open file \"%s\" for reading: %s\n"),
- progname, XLOG_CONTROL_FILE, strerror(errno));
- if (errno == ENOENT)
- fprintf(stderr, _("If you are sure the data directory path is correct, execute\n"
- " touch %s\n"
- "and try again.\n"),
- XLOG_CONTROL_FILE);
- exit(1);
- }
-
- /* Use malloc to ensure we have a maxaligned buffer */
- buffer = (char *) pg_malloc(PG_CONTROL_SIZE);
-
- len = read(fd, buffer, PG_CONTROL_SIZE);
- if (len < 0)
- {
- fprintf(stderr, _("%s: could not read file \"%s\": %s\n"),
- progname, XLOG_CONTROL_FILE, strerror(errno));
- exit(1);
- }
- close(fd);
-
- if (len >= sizeof(ControlFileData) &&
- ((ControlFileData *) buffer)->pg_control_version == PG_CONTROL_VERSION)
- {
- /* Check the CRC. */
- INIT_CRC32C(crc);
- COMP_CRC32C(crc,
- buffer,
- offsetof(ControlFileData, crc));
- FIN_CRC32C(crc);
-
- if (EQ_CRC32C(crc, ((ControlFileData *) buffer)->crc))
- {
- /* Valid data... */
- memcpy(&ControlFile, buffer, sizeof(ControlFile));
- return true;
- }
-
- fprintf(stderr, _("%s: pg_control exists but has invalid CRC; proceed with caution\n"),
- progname);
- /* We will use the data anyway, but treat it as guessed. */
- memcpy(&ControlFile, buffer, sizeof(ControlFile));
- guessed = true;
- return true;
- }
-
- /* Looks like it's a mess. */
- fprintf(stderr, _("%s: pg_control exists but is broken or unknown version; ignoring it\n"),
- progname);
- return false;
-}
-
-
-/*
- * Guess at pg_control values when we can't read the old ones.
- */
-static void
-GuessControlValues(void)
-{
- uint64 sysidentifier;
- struct timeval tv;
-
- /*
- * Set up a completely default set of pg_control values.
- */
- guessed = true;
- memset(&ControlFile, 0, sizeof(ControlFile));
-
- ControlFile.pg_control_version = PG_CONTROL_VERSION;
- ControlFile.catalog_version_no = CATALOG_VERSION_NO;
-
- /*
- * Create a new unique installation identifier, since we can no longer use
- * any old XLOG records. See notes in xlog.c about the algorithm.
- */
- gettimeofday(&tv, NULL);
- sysidentifier = ((uint64) tv.tv_sec) << 32;
- sysidentifier |= ((uint64) tv.tv_usec) << 12;
- sysidentifier |= getpid() & 0xFFF;
-
- ControlFile.system_identifier = sysidentifier;
-
- ControlFile.checkPointCopy.redo = SizeOfXLogLongPHD;
- ControlFile.checkPointCopy.ThisTimeLineID = 1;
- ControlFile.checkPointCopy.PrevTimeLineID = 1;
- ControlFile.checkPointCopy.fullPageWrites = false;
- ControlFile.checkPointCopy.nextXidEpoch = 0;
- ControlFile.checkPointCopy.nextXid = FirstNormalTransactionId;
- ControlFile.checkPointCopy.nextOid = FirstBootstrapObjectId;
- ControlFile.checkPointCopy.nextMulti = FirstMultiXactId;
- ControlFile.checkPointCopy.nextMultiOffset = 0;
- ControlFile.checkPointCopy.oldestXid = FirstNormalTransactionId;
- ControlFile.checkPointCopy.oldestXidDB = InvalidOid;
- ControlFile.checkPointCopy.oldestMulti = FirstMultiXactId;
- ControlFile.checkPointCopy.oldestMultiDB = InvalidOid;
- ControlFile.checkPointCopy.time = (pg_time_t) time(NULL);
- ControlFile.checkPointCopy.oldestActiveXid = InvalidTransactionId;
-
- ControlFile.state = DB_SHUTDOWNED;
- ControlFile.time = (pg_time_t) time(NULL);
- ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
- ControlFile.unloggedLSN = 1;
-
- /* minRecoveryPoint, backupStartPoint and backupEndPoint can be left zero */
-
- ControlFile.wal_level = WAL_LEVEL_MINIMAL;
- ControlFile.wal_log_hints = false;
- ControlFile.track_commit_timestamp = false;
- ControlFile.MaxConnections = 100;
- ControlFile.max_worker_processes = 8;
- ControlFile.max_prepared_xacts = 0;
- ControlFile.max_locks_per_xact = 64;
-
- ControlFile.maxAlign = MAXIMUM_ALIGNOF;
- ControlFile.floatFormat = FLOATFORMAT_VALUE;
- ControlFile.blcksz = BLCKSZ;
- ControlFile.relseg_size = RELSEG_SIZE;
- ControlFile.xlog_blcksz = XLOG_BLCKSZ;
- ControlFile.xlog_seg_size = XLOG_SEG_SIZE;
- ControlFile.nameDataLen = NAMEDATALEN;
- ControlFile.indexMaxKeys = INDEX_MAX_KEYS;
- ControlFile.toast_max_chunk_size = TOAST_MAX_CHUNK_SIZE;
- ControlFile.loblksize = LOBLKSIZE;
-#ifdef HAVE_INT64_TIMESTAMP
- ControlFile.enableIntTimes = true;
-#else
- ControlFile.enableIntTimes = false;
-#endif
- ControlFile.float4ByVal = FLOAT4PASSBYVAL;
- ControlFile.float8ByVal = FLOAT8PASSBYVAL;
-
- /*
- * XXX eventually, should try to grovel through old XLOG to develop more
- * accurate values for TimeLineID, nextXID, etc.
- */
-}
-
-
-/*
- * Print the guessed pg_control values when we had to guess.
- *
- * NB: this display should be just those fields that will not be
- * reset by RewriteControlFile().
- */
-static void
-PrintControlValues(bool guessed)
-{
- char sysident_str[32];
-
- if (guessed)
- printf(_("Guessed pg_control values:\n\n"));
- else
- printf(_("Current pg_control values:\n\n"));
-
- /*
- * Format system_identifier separately to keep platform-dependent format
- * code out of the translatable message string.
- */
- snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT,
- ControlFile.system_identifier);
-
- printf(_("pg_control version number: %u\n"),
- ControlFile.pg_control_version);
- printf(_("Catalog version number: %u\n"),
- ControlFile.catalog_version_no);
- printf(_("Database system identifier: %s\n"),
- sysident_str);
- printf(_("Latest checkpoint's TimeLineID: %u\n"),
- ControlFile.checkPointCopy.ThisTimeLineID);
- printf(_("Latest checkpoint's full_page_writes: %s\n"),
- ControlFile.checkPointCopy.fullPageWrites ? _("on") : _("off"));
- printf(_("Latest checkpoint's NextXID: %u:%u\n"),
- ControlFile.checkPointCopy.nextXidEpoch,
- ControlFile.checkPointCopy.nextXid);
- printf(_("Latest checkpoint's NextOID: %u\n"),
- ControlFile.checkPointCopy.nextOid);
- printf(_("Latest checkpoint's NextMultiXactId: %u\n"),
- ControlFile.checkPointCopy.nextMulti);
- printf(_("Latest checkpoint's NextMultiOffset: %u\n"),
- ControlFile.checkPointCopy.nextMultiOffset);
- printf(_("Latest checkpoint's oldestXID: %u\n"),
- ControlFile.checkPointCopy.oldestXid);
- printf(_("Latest checkpoint's oldestXID's DB: %u\n"),
- ControlFile.checkPointCopy.oldestXidDB);
- printf(_("Latest checkpoint's oldestActiveXID: %u\n"),
- ControlFile.checkPointCopy.oldestActiveXid);
- printf(_("Latest checkpoint's oldestMultiXid: %u\n"),
- ControlFile.checkPointCopy.oldestMulti);
- printf(_("Latest checkpoint's oldestMulti's DB: %u\n"),
- ControlFile.checkPointCopy.oldestMultiDB);
- printf(_("Latest checkpoint's oldestCommitTsXid:%u\n"),
- ControlFile.checkPointCopy.oldestCommitTsXid);
- printf(_("Latest checkpoint's newestCommitTsXid:%u\n"),
- ControlFile.checkPointCopy.newestCommitTsXid);
- printf(_("Maximum data alignment: %u\n"),
- ControlFile.maxAlign);
- /* we don't print floatFormat since can't say much useful about it */
- printf(_("Database block size: %u\n"),
- ControlFile.blcksz);
- printf(_("Blocks per segment of large relation: %u\n"),
- ControlFile.relseg_size);
- printf(_("WAL block size: %u\n"),
- ControlFile.xlog_blcksz);
- printf(_("Bytes per WAL segment: %u\n"),
- ControlFile.xlog_seg_size);
- printf(_("Maximum length of identifiers: %u\n"),
- ControlFile.nameDataLen);
- printf(_("Maximum columns in an index: %u\n"),
- ControlFile.indexMaxKeys);
- printf(_("Maximum size of a TOAST chunk: %u\n"),
- ControlFile.toast_max_chunk_size);
- printf(_("Size of a large-object chunk: %u\n"),
- ControlFile.loblksize);
- printf(_("Date/time type storage: %s\n"),
- (ControlFile.enableIntTimes ? _("64-bit integers") : _("floating-point numbers")));
- printf(_("Float4 argument passing: %s\n"),
- (ControlFile.float4ByVal ? _("by value") : _("by reference")));
- printf(_("Float8 argument passing: %s\n"),
- (ControlFile.float8ByVal ? _("by value") : _("by reference")));
- printf(_("Data page checksum version: %u\n"),
- ControlFile.data_checksum_version);
-}
-
-
-/*
- * Print the values to be changed.
- */
-static void
-PrintNewControlValues(void)
-{
- char fname[MAXFNAMELEN];
-
- /* This will be always printed in order to keep format same. */
- printf(_("\n\nValues to be changed:\n\n"));
-
- XLogFileName(fname, ControlFile.checkPointCopy.ThisTimeLineID, newXlogSegNo);
- printf(_("First log segment after reset: %s\n"), fname);
-
- if (set_mxid != 0)
- {
- printf(_("NextMultiXactId: %u\n"),
- ControlFile.checkPointCopy.nextMulti);
- printf(_("OldestMultiXid: %u\n"),
- ControlFile.checkPointCopy.oldestMulti);
- printf(_("OldestMulti's DB: %u\n"),
- ControlFile.checkPointCopy.oldestMultiDB);
- }
-
- if (set_mxoff != -1)
- {
- printf(_("NextMultiOffset: %u\n"),
- ControlFile.checkPointCopy.nextMultiOffset);
- }
-
- if (set_oid != 0)
- {
- printf(_("NextOID: %u\n"),
- ControlFile.checkPointCopy.nextOid);
- }
-
- if (set_xid != 0)
- {
- printf(_("NextXID: %u\n"),
- ControlFile.checkPointCopy.nextXid);
- printf(_("OldestXID: %u\n"),
- ControlFile.checkPointCopy.oldestXid);
- printf(_("OldestXID's DB: %u\n"),
- ControlFile.checkPointCopy.oldestXidDB);
- }
-
- if (set_xid_epoch != -1)
- {
- printf(_("NextXID epoch: %u\n"),
- ControlFile.checkPointCopy.nextXidEpoch);
- }
-
- if (set_oldest_commit_ts_xid != 0)
- {
- printf(_("oldestCommitTsXid: %u\n"),
- ControlFile.checkPointCopy.oldestCommitTsXid);
- }
- if (set_newest_commit_ts_xid != 0)
- {
- printf(_("newestCommitTsXid: %u\n"),
- ControlFile.checkPointCopy.newestCommitTsXid);
- }
-}
-
-
-/*
- * Write out the new pg_control file.
- */
-static void
-RewriteControlFile(void)
-{
- int fd;
- char buffer[PG_CONTROL_SIZE]; /* need not be aligned */
-
- /*
- * Adjust fields as needed to force an empty XLOG starting at
- * newXlogSegNo.
- */
- XLogSegNoOffsetToRecPtr(newXlogSegNo, SizeOfXLogLongPHD,
- ControlFile.checkPointCopy.redo);
- ControlFile.checkPointCopy.time = (pg_time_t) time(NULL);
-
- ControlFile.state = DB_SHUTDOWNED;
- ControlFile.time = (pg_time_t) time(NULL);
- ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
- ControlFile.prevCheckPoint = 0;
- ControlFile.minRecoveryPoint = 0;
- ControlFile.minRecoveryPointTLI = 0;
- ControlFile.backupStartPoint = 0;
- ControlFile.backupEndPoint = 0;
- ControlFile.backupEndRequired = false;
-
- /*
- * Force the defaults for max_* settings. The values don't really matter
- * as long as wal_level='minimal'; the postmaster will reset these fields
- * anyway at startup.
- */
- ControlFile.wal_level = WAL_LEVEL_MINIMAL;
- ControlFile.wal_log_hints = false;
- ControlFile.track_commit_timestamp = false;
- ControlFile.MaxConnections = 100;
- ControlFile.max_worker_processes = 8;
- ControlFile.max_prepared_xacts = 0;
- ControlFile.max_locks_per_xact = 64;
-
- /* Now we can force the recorded xlog seg size to the right thing. */
- ControlFile.xlog_seg_size = XLogSegSize;
-
- /* Contents are protected with a CRC */
- INIT_CRC32C(ControlFile.crc);
- COMP_CRC32C(ControlFile.crc,
- (char *) &ControlFile,
- offsetof(ControlFileData, crc));
- FIN_CRC32C(ControlFile.crc);
-
- /*
- * We write out PG_CONTROL_SIZE bytes into pg_control, zero-padding the
- * excess over sizeof(ControlFileData). This reduces the odds of
- * premature-EOF errors when reading pg_control. We'll still fail when we
- * check the contents of the file, but hopefully with a more specific
- * error than "couldn't read pg_control".
- */
- if (sizeof(ControlFileData) > PG_CONTROL_SIZE)
- {
- fprintf(stderr,
- _("%s: internal error -- sizeof(ControlFileData) is too large ... fix PG_CONTROL_SIZE\n"),
- progname);
- exit(1);
- }
-
- memset(buffer, 0, PG_CONTROL_SIZE);
- memcpy(buffer, &ControlFile, sizeof(ControlFileData));
-
- unlink(XLOG_CONTROL_FILE);
-
- fd = open(XLOG_CONTROL_FILE,
- O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
- S_IRUSR | S_IWUSR);
- if (fd < 0)
- {
- fprintf(stderr, _("%s: could not create pg_control file: %s\n"),
- progname, strerror(errno));
- exit(1);
- }
-
- errno = 0;
- if (write(fd, buffer, PG_CONTROL_SIZE) != PG_CONTROL_SIZE)
- {
- /* if write didn't set errno, assume problem is no disk space */
- if (errno == 0)
- errno = ENOSPC;
- fprintf(stderr, _("%s: could not write pg_control file: %s\n"),
- progname, strerror(errno));
- exit(1);
- }
-
- if (fsync(fd) != 0)
- {
- fprintf(stderr, _("%s: fsync error: %s\n"), progname, strerror(errno));
- exit(1);
- }
-
- close(fd);
-}
-
-
-/*
- * Scan existing XLOG files and determine the highest existing WAL address
- *
- * On entry, ControlFile.checkPointCopy.redo and ControlFile.xlog_seg_size
- * are assumed valid (note that we allow the old xlog seg size to differ
- * from what we're using). On exit, newXlogId and newXlogSeg are set to
- * suitable values for the beginning of replacement WAL (in our seg size).
- */
-static void
-FindEndOfXLOG(void)
-{
- DIR *xldir;
- struct dirent *xlde;
- uint64 segs_per_xlogid;
- uint64 xlogbytepos;
-
- /*
- * Initialize the max() computation using the last checkpoint address from
- * old pg_control. Note that for the moment we are working with segment
- * numbering according to the old xlog seg size.
- */
- segs_per_xlogid = (UINT64CONST(0x0000000100000000) / ControlFile.xlog_seg_size);
- newXlogSegNo = ControlFile.checkPointCopy.redo / ControlFile.xlog_seg_size;
-
- /*
- * Scan the pg_wal directory to find existing WAL segment files. We
- * assume any present have been used; in most scenarios this should be
- * conservative, because of xlog.c's attempts to pre-create files.
- */
- xldir = opendir(XLOGDIR);
- if (xldir == NULL)
- {
- fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
- progname, XLOGDIR, strerror(errno));
- exit(1);
- }
-
- while (errno = 0, (xlde = readdir(xldir)) != NULL)
- {
- if (IsXLogFileName(xlde->d_name) ||
- IsPartialXLogFileName(xlde->d_name))
- {
- unsigned int tli,
- log,
- seg;
- XLogSegNo segno;
-
- /*
- * Note: We don't use XLogFromFileName here, because we want to
- * use the segment size from the control file, not the size the
- * pg_resetxlog binary was compiled with
- */
- sscanf(xlde->d_name, "%08X%08X%08X", &tli, &log, &seg);
- segno = ((uint64) log) * segs_per_xlogid + seg;
-
- /*
- * Note: we take the max of all files found, regardless of their
- * timelines. Another possibility would be to ignore files of
- * timelines other than the target TLI, but this seems safer.
- * Better too large a result than too small...
- */
- if (segno > newXlogSegNo)
- newXlogSegNo = segno;
- }
- }
-
- if (errno)
- {
- fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"),
- progname, XLOGDIR, strerror(errno));
- exit(1);
- }
-
- if (closedir(xldir))
- {
- fprintf(stderr, _("%s: could not close directory \"%s\": %s\n"),
- progname, XLOGDIR, strerror(errno));
- exit(1);
- }
-
- /*
- * Finally, convert to new xlog seg size, and advance by one to ensure we
- * are in virgin territory.
- */
- xlogbytepos = newXlogSegNo * ControlFile.xlog_seg_size;
- newXlogSegNo = (xlogbytepos + XLogSegSize - 1) / XLogSegSize;
- newXlogSegNo++;
-}
-
-
-/*
- * Remove existing XLOG files
- */
-static void
-KillExistingXLOG(void)
-{
- DIR *xldir;
- struct dirent *xlde;
- char path[MAXPGPATH];
-
- xldir = opendir(XLOGDIR);
- if (xldir == NULL)
- {
- fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
- progname, XLOGDIR, strerror(errno));
- exit(1);
- }
-
- while (errno = 0, (xlde = readdir(xldir)) != NULL)
- {
- if (IsXLogFileName(xlde->d_name) ||
- IsPartialXLogFileName(xlde->d_name))
- {
- snprintf(path, MAXPGPATH, "%s/%s", XLOGDIR, xlde->d_name);
- if (unlink(path) < 0)
- {
- fprintf(stderr, _("%s: could not delete file \"%s\": %s\n"),
- progname, path, strerror(errno));
- exit(1);
- }
- }
- }
-
- if (errno)
- {
- fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"),
- progname, XLOGDIR, strerror(errno));
- exit(1);
- }
-
- if (closedir(xldir))
- {
- fprintf(stderr, _("%s: could not close directory \"%s\": %s\n"),
- progname, XLOGDIR, strerror(errno));
- exit(1);
- }
-}
-
-
-/*
- * Remove existing archive status files
- */
-static void
-KillExistingArchiveStatus(void)
-{
- DIR *xldir;
- struct dirent *xlde;
- char path[MAXPGPATH];
-
-#define ARCHSTATDIR XLOGDIR "/archive_status"
-
- xldir = opendir(ARCHSTATDIR);
- if (xldir == NULL)
- {
- fprintf(stderr, _("%s: could not open directory \"%s\": %s\n"),
- progname, ARCHSTATDIR, strerror(errno));
- exit(1);
- }
-
- while (errno = 0, (xlde = readdir(xldir)) != NULL)
- {
- if (strspn(xlde->d_name, "0123456789ABCDEF") == XLOG_FNAME_LEN &&
- (strcmp(xlde->d_name + XLOG_FNAME_LEN, ".ready") == 0 ||
- strcmp(xlde->d_name + XLOG_FNAME_LEN, ".done") == 0 ||
- strcmp(xlde->d_name + XLOG_FNAME_LEN, ".partial.ready") == 0 ||
- strcmp(xlde->d_name + XLOG_FNAME_LEN, ".partial.done") == 0))
- {
- snprintf(path, MAXPGPATH, "%s/%s", ARCHSTATDIR, xlde->d_name);
- if (unlink(path) < 0)
- {
- fprintf(stderr, _("%s: could not delete file \"%s\": %s\n"),
- progname, path, strerror(errno));
- exit(1);
- }
- }
- }
-
- if (errno)
- {
- fprintf(stderr, _("%s: could not read directory \"%s\": %s\n"),
- progname, ARCHSTATDIR, strerror(errno));
- exit(1);
- }
-
- if (closedir(xldir))
- {
- fprintf(stderr, _("%s: could not close directory \"%s\": %s\n"),
- progname, ARCHSTATDIR, strerror(errno));
- exit(1);
- }
-}
-
-
-/*
- * Write an empty XLOG file, containing only the checkpoint record
- * already set up in ControlFile.
- */
-static void
-WriteEmptyXLOG(void)
-{
- char *buffer;
- XLogPageHeader page;
- XLogLongPageHeader longpage;
- XLogRecord *record;
- pg_crc32c crc;
- char path[MAXPGPATH];
- int fd;
- int nbytes;
- char *recptr;
-
- /* Use malloc() to ensure buffer is MAXALIGNED */
- buffer = (char *) pg_malloc(XLOG_BLCKSZ);
- page = (XLogPageHeader) buffer;
- memset(buffer, 0, XLOG_BLCKSZ);
-
- /* Set up the XLOG page header */
- page->xlp_magic = XLOG_PAGE_MAGIC;
- page->xlp_info = XLP_LONG_HEADER;
- page->xlp_tli = ControlFile.checkPointCopy.ThisTimeLineID;
- page->xlp_pageaddr = ControlFile.checkPointCopy.redo - SizeOfXLogLongPHD;
- longpage = (XLogLongPageHeader) page;
- longpage->xlp_sysid = ControlFile.system_identifier;
- longpage->xlp_seg_size = XLogSegSize;
- longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
-
- /* Insert the initial checkpoint record */
- recptr = (char *) page + SizeOfXLogLongPHD;
- record = (XLogRecord *) recptr;
- record->xl_prev = 0;
- record->xl_xid = InvalidTransactionId;
- record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint);
- record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
- record->xl_rmid = RM_XLOG_ID;
-
- recptr += SizeOfXLogRecord;
- *(recptr++) = XLR_BLOCK_ID_DATA_SHORT;
- *(recptr++) = sizeof(CheckPoint);
- memcpy(recptr, &ControlFile.checkPointCopy,
- sizeof(CheckPoint));
-
- INIT_CRC32C(crc);
- COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
- COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
- FIN_CRC32C(crc);
- record->xl_crc = crc;
-
- /* Write the first page */
- XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID, newXlogSegNo);
-
- unlink(path);
-
- fd = open(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
- S_IRUSR | S_IWUSR);
- if (fd < 0)
- {
- fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
- progname, path, strerror(errno));
- exit(1);
- }
-
- errno = 0;
- if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ)
- {
- /* if write didn't set errno, assume problem is no disk space */
- if (errno == 0)
- errno = ENOSPC;
- fprintf(stderr, _("%s: could not write file \"%s\": %s\n"),
- progname, path, strerror(errno));
- exit(1);
- }
-
- /* Fill the rest of the file with zeroes */
- memset(buffer, 0, XLOG_BLCKSZ);
- for (nbytes = XLOG_BLCKSZ; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)
- {
- errno = 0;
- if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ)
- {
- if (errno == 0)
- errno = ENOSPC;
- fprintf(stderr, _("%s: could not write file \"%s\": %s\n"),
- progname, path, strerror(errno));
- exit(1);
- }
- }
-
- if (fsync(fd) != 0)
- {
- fprintf(stderr, _("%s: fsync error: %s\n"), progname, strerror(errno));
- exit(1);
- }
-
- close(fd);
-}
-
-
-static void
-usage(void)
-{
- printf(_("%s resets the PostgreSQL transaction log.\n\n"), progname);
- printf(_("Usage:\n %s [OPTION]... DATADIR\n\n"), progname);
- printf(_("Options:\n"));
- printf(_(" -c XID,XID set oldest and newest transactions bearing commit timestamp\n"));
- printf(_(" (zero in either value means no change)\n"));
- printf(_(" [-D] DATADIR data directory\n"));
- printf(_(" -e XIDEPOCH set next transaction ID epoch\n"));
- printf(_(" -f force update to be done\n"));
- printf(_(" -l XLOGFILE force minimum WAL starting location for new transaction log\n"));
- printf(_(" -m MXID,MXID set next and oldest multitransaction ID\n"));
- printf(_(" -n no update, just show what would be done (for testing)\n"));
- printf(_(" -o OID set next OID\n"));
- printf(_(" -O OFFSET set next multitransaction offset\n"));
- printf(_(" -V, --version output version information, then exit\n"));
- printf(_(" -x XID set next transaction ID\n"));
- printf(_(" -?, --help show this help, then exit\n"));
- printf(_("\nReport bugs to .\n"));
-}
diff --git a/src/bin/pg_resetxlog/po/cs.po b/src/bin/pg_resetxlog/po/cs.po
deleted file mode 100644
index 52ddcacc82..0000000000
--- a/src/bin/pg_resetxlog/po/cs.po
+++ /dev/null
@@ -1,498 +0,0 @@
-# Czech message translation file for pg_resetxlog
-# Copyright (C) 2012 PostgreSQL Global Development Group
-# This file is distributed under the same license as the PostgreSQL package.
-#
-# Tomas Vondra , 2012, 2013.
-msgid ""
-msgstr ""
-"Project-Id-Version: pg_resetxlog-cs (PostgreSQL 9.3)\n"
-"Report-Msgid-Bugs-To: pgsql-bugs@postgresql.org\n"
-"POT-Creation-Date: 2013-09-23 20:18+0000\n"
-"PO-Revision-Date: 2013-12-01 20:46-0500\n"
-"Last-Translator: Tomas Vondra \n"
-"Language-Team: Czech \n"
-"Language: cs\n"
-"MIME-Version: 1.0\n"
-"Content-Type: text/plain; charset=UTF-8\n"
-"Content-Transfer-Encoding: 8bit\n"
-"Plural-Forms: nplurals=3; plural=(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2;\n"
-"X-Generator: Lokalize 1.5\n"
-
-#: pg_resetxlog.c:133
-#, c-format
-msgid "%s: invalid argument for option -e\n"
-msgstr "%s: neplatný argument pro volbu -e\n"
-
-#: pg_resetxlog.c:134 pg_resetxlog.c:149 pg_resetxlog.c:164 pg_resetxlog.c:179
-#: pg_resetxlog.c:187 pg_resetxlog.c:213 pg_resetxlog.c:227 pg_resetxlog.c:234
-#: pg_resetxlog.c:242
-#, c-format
-msgid "Try \"%s --help\" for more information.\n"
-msgstr "Zkuste \"%s --help\" pro více informací.\n"
-
-#: pg_resetxlog.c:139
-#, c-format
-msgid "%s: transaction ID epoch (-e) must not be -1\n"
-msgstr "%s: epocha ID transakce (-e) nesmí být -1\n"
-
-#: pg_resetxlog.c:148
-#, c-format
-msgid "%s: invalid argument for option -x\n"
-msgstr "%s: neplatný argument pro volbu -x\n"
-
-#: pg_resetxlog.c:154
-#, c-format
-msgid "%s: transaction ID (-x) must not be 0\n"
-msgstr "%s: ID transakce (-x) nesmí být 0\n"
-
-#: pg_resetxlog.c:163
-#, c-format
-msgid "%s: invalid argument for option -o\n"
-msgstr "%s: neplatný argument pro volbu -o\n"
-
-#: pg_resetxlog.c:169
-#, c-format
-msgid "%s: OID (-o) must not be 0\n"
-msgstr "%s: OID (-o) nesmí být 0\n"
-
-#: pg_resetxlog.c:178 pg_resetxlog.c:186
-#, c-format
-msgid "%s: invalid argument for option -m\n"
-msgstr "%s: neplatný argument pro volbu -m\n"
-
-#: pg_resetxlog.c:192
-#, c-format
-msgid "%s: multitransaction ID (-m) must not be 0\n"
-msgstr "%s: ID transakce (-m) nesmí být 0\n"
-
-#: pg_resetxlog.c:202
-#, c-format
-msgid "%s: oldest multitransaction ID (-m) must not be 0\n"
-msgstr "%s: ID nejstarší multitransakce (-m) nesmí být 0\n"
-
-#: pg_resetxlog.c:212
-#, c-format
-msgid "%s: invalid argument for option -O\n"
-msgstr "%s: neplatný argument pro volbu -O\n"
-
-#: pg_resetxlog.c:218
-#, c-format
-msgid "%s: multitransaction offset (-O) must not be -1\n"
-msgstr "%s: ID transakce (-O) nesmí být -1\n"
-
-#: pg_resetxlog.c:226
-#, c-format
-msgid "%s: invalid argument for option -l\n"
-msgstr "%s: neplatný argument pro volbu -l\n"
-
-#: pg_resetxlog.c:241
-#, c-format
-msgid "%s: no data directory specified\n"
-msgstr "%s: není specifikován datový adresář\n"
-
-#: pg_resetxlog.c:255
-#, c-format
-msgid "%s: cannot be executed by \"root\"\n"
-msgstr "%s: nemůže být spuštěn uživatelem \"root\"\n"
-
-#: pg_resetxlog.c:257
-#, c-format
-msgid "You must run %s as the PostgreSQL superuser.\n"
-msgstr "Musíte spustit %s jako PostgreSQL superuživatel.\n"
-
-#: pg_resetxlog.c:267
-#, c-format
-msgid "%s: could not change directory to \"%s\": %s\n"
-msgstr "%s: nelze změnit adresář na \"%s\": %s\n"
-
-#: pg_resetxlog.c:280 pg_resetxlog.c:414
-#, c-format
-msgid "%s: could not open file \"%s\" for reading: %s\n"
-msgstr "%s: nelze otevřít soubor \"%s\" pro čtení: %s\n"
-
-#: pg_resetxlog.c:287
-#, c-format
-msgid ""
-"%s: lock file \"%s\" exists\n"
-"Is a server running? If not, delete the lock file and try again.\n"
-msgstr ""
-"%s: soubor se zámkem \"%s\" existuje\n"
-"Neběží již server? Jestliže ne, smažte soubor se zámkem a zkuste to znova.\n"
-
-#: pg_resetxlog.c:362
-#, c-format
-msgid ""
-"\n"
-"If these values seem acceptable, use -f to force reset.\n"
-msgstr ""
-"\n"
-"Jestliže tyto hodnoty vypadají akceptovatelně, použijte -f pro vynucený "
-"reset.\n"
-
-#: pg_resetxlog.c:374
-#, c-format
-msgid ""
-"The database server was not shut down cleanly.\n"
-"Resetting the transaction log might cause data to be lost.\n"
-"If you want to proceed anyway, use -f to force reset.\n"
-msgstr ""
-"Databázový server nebyl ukončen standardně.\n"
-"Resetování transakčního logu může způsobit ztrátu dat.\n"
-"Jestliže i přesto chcete pokračovat, použijte -f pro vynucený reset.\n"
-
-#: pg_resetxlog.c:388
-#, c-format
-msgid "Transaction log reset\n"
-msgstr "Transakční log resetován\n"
-
-#: pg_resetxlog.c:417
-#, c-format
-msgid ""
-"If you are sure the data directory path is correct, execute\n"
-" touch %s\n"
-"and try again.\n"
-msgstr ""
-"Máte-li jistotu, že je cesta k datovému adresáři správná, proveďte\n"
-" touch %s\n"
-"a zkuste to znovu.\n"
-
-#: pg_resetxlog.c:430
-#, c-format
-msgid "%s: could not read file \"%s\": %s\n"
-msgstr "%s: nelze číst soubor \"%s\": %s\n"
-
-#: pg_resetxlog.c:453
-#, c-format
-msgid "%s: pg_control exists but has invalid CRC; proceed with caution\n"
-msgstr ""
-"%s: pg_control existuje, ale s neplatným kontrolním součtem CRC; postupujte "
-"opatrně\n"
-
-#: pg_resetxlog.c:462
-#, c-format
-msgid "%s: pg_control exists but is broken or unknown version; ignoring it\n"
-msgstr ""
-"%s: pg_control existuje, ale je poškozen nebo neznámé verze; ignoruji to\n"
-
-#: pg_resetxlog.c:561
-#, c-format
-msgid ""
-"Guessed pg_control values:\n"
-"\n"
-msgstr ""
-"Odhadnuté hodnoty pg_controlu:\n"
-"\n"
-
-#: pg_resetxlog.c:563
-#, c-format
-msgid ""
-"pg_control values:\n"
-"\n"
-msgstr ""
-"Hodnoty pg_controlu:\n"
-"\n"
-
-#: pg_resetxlog.c:574
-#, c-format
-msgid "First log segment after reset: %s\n"
-msgstr "První log segment po resetu: %s\n"
-
-#: pg_resetxlog.c:576
-#, c-format
-msgid "pg_control version number: %u\n"
-msgstr "číslo verze pg_controlu: %u\n"
-
-#: pg_resetxlog.c:578
-#, c-format
-msgid "Catalog version number: %u\n"
-msgstr "Číslo verze katalogu: %u\n"
-
-#: pg_resetxlog.c:580
-#, c-format
-msgid "Database system identifier: %s\n"
-msgstr "Identifikátor databázového systému: %s\n"
-
-#: pg_resetxlog.c:582
-#, c-format
-msgid "Latest checkpoint's TimeLineID: %u\n"
-msgstr "TimeLineID posledního checkpointu: %u\n"
-
-#: pg_resetxlog.c:584
-#, c-format
-msgid "Latest checkpoint's full_page_writes: %s\n"
-msgstr "Poslední full_page_writes checkpointu: %s\n"
-
-#: pg_resetxlog.c:585
-msgid "off"
-msgstr "vypnuto"
-
-#: pg_resetxlog.c:585
-msgid "on"
-msgstr "zapnuto"
-
-#: pg_resetxlog.c:586
-#, c-format
-msgid "Latest checkpoint's NextXID: %u/%u\n"
-msgstr "Poslední umístění NextXID checkpointu: %u/%u\n"
-
-#: pg_resetxlog.c:589
-#, c-format
-msgid "Latest checkpoint's NextOID: %u\n"
-msgstr "Poslední umístění NextOID checkpointu: %u\n"
-
-#: pg_resetxlog.c:591
-#, c-format
-msgid "Latest checkpoint's NextMultiXactId: %u\n"
-msgstr "NextMultiXactId posledního checkpointu: %u\n"
-
-#: pg_resetxlog.c:593
-#, c-format
-msgid "Latest checkpoint's NextMultiOffset: %u\n"
-msgstr "NextMultiOffset posledního checkpointu: %u\n"
-
-#: pg_resetxlog.c:595
-#, c-format
-msgid "Latest checkpoint's oldestXID: %u\n"
-msgstr "oldestXID posledního checkpointu: %u\n"
-
-#: pg_resetxlog.c:597
-#, c-format
-msgid "Latest checkpoint's oldestXID's DB: %u\n"
-msgstr "DB k oldestXID posledního checkpointu: %u\n"
-
-#: pg_resetxlog.c:599
-#, c-format
-msgid "Latest checkpoint's oldestActiveXID: %u\n"
-msgstr "oldestActiveXID posledního checkpointu: %u\n"
-
-#: pg_resetxlog.c:601
-#, c-format
-msgid "Latest checkpoint's oldestMultiXid: %u\n"
-msgstr "oldestMultiXid posledního checkpointu: %u\n"
-
-#: pg_resetxlog.c:603
-#, c-format
-msgid "Latest checkpoint's oldestMulti's DB: %u\n"
-msgstr "oldestMulti's DB posledního checkpointu: %u\n"
-
-#: pg_resetxlog.c:605
-#, c-format
-msgid "Maximum data alignment: %u\n"
-msgstr "Maximální zarovnání dat: %u\n"
-
-#: pg_resetxlog.c:608
-#, c-format
-msgid "Database block size: %u\n"
-msgstr "Velikost databázového bloku: %u\n"
-
-#: pg_resetxlog.c:610
-#, c-format
-msgid "Blocks per segment of large relation: %u\n"
-msgstr "Bloků v segmentu velké relace: %u\n"
-
-#: pg_resetxlog.c:612
-#, c-format
-msgid "WAL block size: %u\n"
-msgstr "Velikost WAL bloku: %u\n"
-
-#: pg_resetxlog.c:614
-#, c-format
-msgid "Bytes per WAL segment: %u\n"
-msgstr "Bytů ve WAL segmentu: %u\n"
-
-#: pg_resetxlog.c:616
-#, c-format
-msgid "Maximum length of identifiers: %u\n"
-msgstr "Maximální délka identifikátorů: %u\n"
-
-#: pg_resetxlog.c:618
-#, c-format
-msgid "Maximum columns in an index: %u\n"
-msgstr "Maximální počet sloupců v indexu: %u\n"
-
-#: pg_resetxlog.c:620
-#, c-format
-msgid "Maximum size of a TOAST chunk: %u\n"
-msgstr "Maximální velikost úseku TOAST: %u\n"
-
-#: pg_resetxlog.c:622
-#, c-format
-msgid "Date/time type storage: %s\n"
-msgstr "Způsob uložení typu date/time: %s\n"
-
-#: pg_resetxlog.c:623
-msgid "64-bit integers"
-msgstr "64-bitová čísla"
-
-#: pg_resetxlog.c:623
-msgid "floating-point numbers"
-msgstr "čísla s plovoucí řádovou čárkou"
-
-#: pg_resetxlog.c:624
-#, c-format
-msgid "Float4 argument passing: %s\n"
-msgstr "Způsob předávání float4 hodnot: %s\n"
-
-#: pg_resetxlog.c:625 pg_resetxlog.c:627
-msgid "by reference"
-msgstr "odkazem"
-
-#: pg_resetxlog.c:625 pg_resetxlog.c:627
-msgid "by value"
-msgstr "hodnotou"
-
-#: pg_resetxlog.c:626
-#, c-format
-msgid "Float8 argument passing: %s\n"
-msgstr "Způsob předávání float8 hodnot: %s\n"
-
-#: pg_resetxlog.c:628
-#, c-format
-#| msgid "Catalog version number: %u\n"
-msgid "Data page checksum version: %u\n"
-msgstr "Verze kontrolních součtů datových stránek: %u\n"
-
-#: pg_resetxlog.c:690
-#, c-format
-msgid ""
-"%s: internal error -- sizeof(ControlFileData) is too large ... fix "
-"PG_CONTROL_SIZE\n"
-msgstr ""
-"%s: interní chyba -- sizeof(ControlFileData) je příliš velký ... opravte "
-"PG_CONTROL_SIZE\n"
-
-#: pg_resetxlog.c:705
-#, c-format
-msgid "%s: could not create pg_control file: %s\n"
-msgstr "%s: nelze vytvořit pg_control soubor: %s\n"
-
-#: pg_resetxlog.c:716
-#, c-format
-msgid "%s: could not write pg_control file: %s\n"
-msgstr "%s: nelze zapsat pg_control soubor: %s\n"
-
-#: pg_resetxlog.c:723 pg_resetxlog.c:1022
-#, c-format
-msgid "%s: fsync error: %s\n"
-msgstr "%s: fsync chyba: %s\n"
-
-#: pg_resetxlog.c:763 pg_resetxlog.c:834 pg_resetxlog.c:890
-#, c-format
-msgid "%s: could not open directory \"%s\": %s\n"
-msgstr "%s: nelze otevřít adresář \"%s\": %s\n"
-
-#: pg_resetxlog.c:805 pg_resetxlog.c:867 pg_resetxlog.c:924
-#, c-format
-msgid "%s: could not read from directory \"%s\": %s\n"
-msgstr "%s: nelze číst z adresáře \"%s\": %s\n"
-
-#: pg_resetxlog.c:848 pg_resetxlog.c:905
-#, c-format
-msgid "%s: could not delete file \"%s\": %s\n"
-msgstr "%s: nelze smazat soubor \"%s\": %s\n"
-
-#: pg_resetxlog.c:989
-#, c-format
-msgid "%s: could not open file \"%s\": %s\n"
-msgstr "%s: nelze otevřít soubor \"%s\": %s\n"
-
-#: pg_resetxlog.c:1000 pg_resetxlog.c:1014
-#, c-format
-msgid "%s: could not write file \"%s\": %s\n"
-msgstr "%s: nelze zapsat do souboru \"%s\": %s\n"
-
-#: pg_resetxlog.c:1033
-#, c-format
-msgid ""
-"%s resets the PostgreSQL transaction log.\n"
-"\n"
-msgstr ""
-"%s resetuje PostgreSQL transakční log.\n"
-"\n"
-
-#: pg_resetxlog.c:1034
-#, c-format
-msgid ""
-"Usage:\n"
-" %s [OPTION]... DATADIR\n"
-"\n"
-msgstr ""
-"Použití:\n"
-" %s [VOLBA]... ADRESÁŘ\n"
-"\n"
-
-#: pg_resetxlog.c:1035
-#, c-format
-msgid "Options:\n"
-msgstr "Přepínače:\n"
-
-#: pg_resetxlog.c:1036
-#, c-format
-msgid " -e XIDEPOCH set next transaction ID epoch\n"
-msgstr " -e XIDEPOCH nastaví epochu následujícího ID transakce\n"
-
-#: pg_resetxlog.c:1037
-#, c-format
-msgid " -f force update to be done\n"
-msgstr " -f vynutí provedení update\n"
-
-#: pg_resetxlog.c:1038
-#, c-format
-msgid ""
-" -l XLOGFILE force minimum WAL starting location for new transaction "
-"log\n"
-msgstr ""
-" -l XLOGFILE vynutí minimální počáteční WAL pozici pro nový transakční "
-"log\n"
-
-#: pg_resetxlog.c:1039
-#, c-format
-#| msgid " -x XID set next transaction ID\n"
-msgid " -m MXID,MXID set next and oldest multitransaction ID\n"
-msgstr " -m MXID,MXID nastav další a nejstarší ID multitransakce\n"
-
-#: pg_resetxlog.c:1040
-#, c-format
-msgid ""
-" -n no update, just show extracted control values (for "
-"testing)\n"
-msgstr ""
-" -n bez změny, jen ukáže získané kontrolní hodnoty (pro "
-"testování)\n"
-
-#: pg_resetxlog.c:1041
-#, c-format
-msgid " -o OID set next OID\n"
-msgstr " -o OID nastaví následující OID\n"
-
-#: pg_resetxlog.c:1042
-#, c-format
-msgid " -O OFFSET set next multitransaction offset\n"
-msgstr " -O OFFSET nastaví offset následující multitransakce\n"
-
-#: pg_resetxlog.c:1043
-#, c-format
-msgid " -V, --version output version information, then exit\n"
-msgstr " -V, --version ukáže informace o verzi a skončí\n"
-
-#: pg_resetxlog.c:1044
-#, c-format
-msgid " -x XID set next transaction ID\n"
-msgstr " -x XID nastaví ID následující transakce\n"
-
-#: pg_resetxlog.c:1045
-#, c-format
-msgid " -?, --help show this help, then exit\n"
-msgstr " -?, --help ukáže tuto nápovědu a skončí\n"
-
-#: pg_resetxlog.c:1046
-#, c-format
-msgid ""
-"\n"
-"Report bugs to .\n"
-msgstr ""
-"\n"
-"Chyby hlaste na adresu .\n"
-
-#~ msgid "First log file ID after reset: %u\n"
-#~ msgstr "První ID log souboru po resetu: %u\n"
diff --git a/src/bin/pg_resetxlog/po/de.po b/src/bin/pg_resetxlog/po/de.po
deleted file mode 100644
index fb6324d1e2..0000000000
--- a/src/bin/pg_resetxlog/po/de.po
+++ /dev/null
@@ -1,605 +0,0 @@
-# German message translation file for pg_resetxlog
-# Peter Eisentraut , 2002 - 2016.
-#
-# Use these quotes: »%s«
-#
-msgid ""
-msgstr ""
-"Project-Id-Version: PostgreSQL 9.6\n"
-"Report-Msgid-Bugs-To: pgsql-bugs@postgresql.org\n"
-"POT-Creation-Date: 2016-04-12 20:14+0000\n"
-"PO-Revision-Date: 2016-04-12 18:28-0400\n"
-"Last-Translator: Peter Eisentraut \n"
-"Language-Team: German \n"
-"Language: de\n"
-"MIME-Version: 1.0\n"
-"Content-Type: text/plain; charset=UTF-8\n"
-"Content-Transfer-Encoding: 8bit\n"
-
-#: ../../common/restricted_token.c:68
-#, c-format
-msgid "%s: WARNING: cannot create restricted tokens on this platform\n"
-msgstr "%s: WARNUNG: auf dieser Plattform können keine beschränkten Token erzeugt werden\n"
-
-#: ../../common/restricted_token.c:77
-#, c-format
-msgid "%s: could not open process token: error code %lu\n"
-msgstr "%s: konnte Prozess-Token nicht öffnen: Fehlercode %lu\n"
-
-#: ../../common/restricted_token.c:90
-#, c-format
-msgid "%s: could not allocate SIDs: error code %lu\n"
-msgstr "%s: konnte SIDs nicht erzeugen: Fehlercode %lu\n"
-
-#: ../../common/restricted_token.c:110
-#, c-format
-msgid "%s: could not create restricted token: error code %lu\n"
-msgstr "%s: konnte beschränktes Token nicht erzeugen: Fehlercode %lu\n"
-
-#: ../../common/restricted_token.c:132
-#, c-format
-msgid "%s: could not start process for command \"%s\": error code %lu\n"
-msgstr "%s: konnte Prozess für Befehl »%s« nicht starten: Fehlercode %lu\n"
-
-#: ../../common/restricted_token.c:170
-#, c-format
-msgid "%s: could not re-execute with restricted token: error code %lu\n"
-msgstr "%s: konnte Prozess nicht mit beschränktem Token neu starten: Fehlercode %lu\n"
-
-#: ../../common/restricted_token.c:186
-#, c-format
-msgid "%s: could not get exit code from subprocess: error code %lu\n"
-msgstr "%s: konnte Statuscode des Subprozesses nicht ermitteln: Fehlercode %lu\n"
-
-#. translator: the second %s is a command line argument (-e, etc)
-#: pg_resetxlog.c:140 pg_resetxlog.c:155 pg_resetxlog.c:170 pg_resetxlog.c:177
-#: pg_resetxlog.c:201 pg_resetxlog.c:216 pg_resetxlog.c:224 pg_resetxlog.c:250
-#: pg_resetxlog.c:264
-#, c-format
-msgid "%s: invalid argument for option %s\n"
-msgstr "%s: ungültiges Argument für Option %s\n"
-
-#: pg_resetxlog.c:141 pg_resetxlog.c:156 pg_resetxlog.c:171 pg_resetxlog.c:178
-#: pg_resetxlog.c:202 pg_resetxlog.c:217 pg_resetxlog.c:225 pg_resetxlog.c:251
-#: pg_resetxlog.c:265 pg_resetxlog.c:272 pg_resetxlog.c:285 pg_resetxlog.c:293
-#, c-format
-msgid "Try \"%s --help\" for more information.\n"
-msgstr "Versuchen Sie »%s --help« für weitere Informationen.\n"
-
-#: pg_resetxlog.c:146
-#, c-format
-msgid "%s: transaction ID epoch (-e) must not be -1\n"
-msgstr "%s: Transaktions-ID-Epoche (-e) darf nicht -1 sein\n"
-
-#: pg_resetxlog.c:161
-#, c-format
-msgid "%s: transaction ID (-x) must not be 0\n"
-msgstr "%s: Transaktions-ID (-x) darf nicht 0 sein\n"
-
-#: pg_resetxlog.c:185 pg_resetxlog.c:192
-#, c-format
-msgid "%s: transaction ID (-c) must be either 0 or greater than or equal to 2\n"
-msgstr "%s: Transaktions-ID (-c) muss entweder 0 oder größer oder gleich 2 sein\n"
-
-#: pg_resetxlog.c:207
-#, c-format
-msgid "%s: OID (-o) must not be 0\n"
-msgstr "%s: OID (-o) darf nicht 0 sein\n"
-
-#: pg_resetxlog.c:230
-#, c-format
-msgid "%s: multitransaction ID (-m) must not be 0\n"
-msgstr "%s: Multitransaktions-ID (-m) darf nicht 0 sein\n"
-
-#: pg_resetxlog.c:240
-#, c-format
-msgid "%s: oldest multitransaction ID (-m) must not be 0\n"
-msgstr "%s: älteste Multitransaktions-ID (-m) darf nicht 0 sein\n"
-
-#: pg_resetxlog.c:256
-#, c-format
-msgid "%s: multitransaction offset (-O) must not be -1\n"
-msgstr "%s: Multitransaktions-Offset (-O) darf nicht -1 sein\n"
-
-#: pg_resetxlog.c:283
-#, c-format
-msgid "%s: too many command-line arguments (first is \"%s\")\n"
-msgstr "%s: zu viele Kommandozeilenargumente (das erste ist »%s«)\n"
-
-#: pg_resetxlog.c:292
-#, c-format
-msgid "%s: no data directory specified\n"
-msgstr "%s: kein Datenverzeichnis angegeben\n"
-
-#: pg_resetxlog.c:306
-#, c-format
-msgid "%s: cannot be executed by \"root\"\n"
-msgstr "%s: kann nicht von »root« ausgeführt werden\n"
-
-#: pg_resetxlog.c:308
-#, c-format
-msgid "You must run %s as the PostgreSQL superuser.\n"
-msgstr "Sie müssen %s als PostgreSQL-Superuser ausführen.\n"
-
-#: pg_resetxlog.c:318
-#, c-format
-msgid "%s: could not change directory to \"%s\": %s\n"
-msgstr "%s: konnte nicht in Verzeichnis »%s« wechseln: %s\n"
-
-#: pg_resetxlog.c:331 pg_resetxlog.c:477
-#, c-format
-msgid "%s: could not open file \"%s\" for reading: %s\n"
-msgstr "%s: konnte Datei »%s« nicht zum Lesen öffnen: %s\n"
-
-#: pg_resetxlog.c:338
-#, c-format
-msgid ""
-"%s: lock file \"%s\" exists\n"
-"Is a server running? If not, delete the lock file and try again.\n"
-msgstr ""
-"%s: Sperrdatei »%s« existiert bereits\n"
-"Läuft der Server? Wenn nicht, dann Sperrdatei löschen und nochmal versuchen.\n"
-
-#: pg_resetxlog.c:425
-#, c-format
-msgid ""
-"\n"
-"If these values seem acceptable, use -f to force reset.\n"
-msgstr ""
-"\n"
-"Wenn diese Werte akzeptabel scheinen, dann benutzen Sie -f um das\n"
-"Zurücksetzen zu erzwingen.\n"
-
-#: pg_resetxlog.c:437
-#, c-format
-msgid ""
-"The database server was not shut down cleanly.\n"
-"Resetting the transaction log might cause data to be lost.\n"
-"If you want to proceed anyway, use -f to force reset.\n"
-msgstr ""
-"Der Datenbankserver wurde nicht sauber heruntergefahren.\n"
-"Beim Zurücksetzen des Transaktionslogs können Daten verloren gehen.\n"
-"Wenn Sie trotzdem weiter machen wollen, benutzen Sie -f, um das\n"
-"Zurücksetzen zu erzwingen.\n"
-
-#: pg_resetxlog.c:451
-#, c-format
-msgid "Transaction log reset\n"
-msgstr "Transaktionslog wurde zurück gesetzt\n"
-
-#: pg_resetxlog.c:480
-#, c-format
-msgid ""
-"If you are sure the data directory path is correct, execute\n"
-" touch %s\n"
-"and try again.\n"
-msgstr ""
-"Wenn Sie sicher sind, dass das Datenverzeichnis korrekt ist, führen Sie\n"
-" touch %s\n"
-"aus und versuchen Sie es erneut.\n"
-
-#: pg_resetxlog.c:493
-#, c-format
-msgid "%s: could not read file \"%s\": %s\n"
-msgstr "%s: konnte Datei »%s« nicht lesen: %s\n"
-
-#: pg_resetxlog.c:516
-#, c-format
-msgid "%s: pg_control exists but has invalid CRC; proceed with caution\n"
-msgstr "%s: pg_control existiert, aber mit ungültiger CRC; mit Vorsicht fortfahren\n"
-
-#: pg_resetxlog.c:525
-#, c-format
-msgid "%s: pg_control exists but is broken or unknown version; ignoring it\n"
-msgstr "%s: pg_control existiert, aber ist kaputt oder hat unbekannte Version; wird ignoriert\n"
-
-#: pg_resetxlog.c:628
-#, c-format
-msgid ""
-"Guessed pg_control values:\n"
-"\n"
-msgstr ""
-"Geschätzte pg_control-Werte:\n"
-"\n"
-
-#: pg_resetxlog.c:630
-#, c-format
-msgid ""
-"Current pg_control values:\n"
-"\n"
-msgstr ""
-"Aktuelle pg_control-Werte:\n"
-"\n"
-
-#: pg_resetxlog.c:639
-#, c-format
-msgid "pg_control version number: %u\n"
-msgstr "pg_control-Versionsnummer: %u\n"
-
-#: pg_resetxlog.c:641
-#, c-format
-msgid "Catalog version number: %u\n"
-msgstr "Katalogversionsnummer: %u\n"
-
-#: pg_resetxlog.c:643
-#, c-format
-msgid "Database system identifier: %s\n"
-msgstr "Datenbanksystemidentifikation: %s\n"
-
-#: pg_resetxlog.c:645
-#, c-format
-msgid "Latest checkpoint's TimeLineID: %u\n"
-msgstr "TimeLineID des letzten Checkpoints: %u\n"
-
-#: pg_resetxlog.c:647
-#, c-format
-msgid "Latest checkpoint's full_page_writes: %s\n"
-msgstr "full_page_writes des letzten Checkpoints: %s\n"
-
-#: pg_resetxlog.c:648
-msgid "off"
-msgstr "aus"
-
-#: pg_resetxlog.c:648
-msgid "on"
-msgstr "an"
-
-#: pg_resetxlog.c:649
-#, c-format
-msgid "Latest checkpoint's NextXID: %u:%u\n"
-msgstr "NextXID des letzten Checkpoints: %u:%u\n"
-
-#: pg_resetxlog.c:652
-#, c-format
-msgid "Latest checkpoint's NextOID: %u\n"
-msgstr "NextOID des letzten Checkpoints: %u\n"
-
-#: pg_resetxlog.c:654
-#, c-format
-msgid "Latest checkpoint's NextMultiXactId: %u\n"
-msgstr "NextMultiXactId des letzten Checkpoints: %u\n"
-
-#: pg_resetxlog.c:656
-#, c-format
-msgid "Latest checkpoint's NextMultiOffset: %u\n"
-msgstr "NextMultiOffset des letzten Checkpoints: %u\n"
-
-#: pg_resetxlog.c:658
-#, c-format
-msgid "Latest checkpoint's oldestXID: %u\n"
-msgstr "oldestXID des letzten Checkpoints: %u\n"
-
-#: pg_resetxlog.c:660
-#, c-format
-msgid "Latest checkpoint's oldestXID's DB: %u\n"
-msgstr "DB der oldestXID des letzten Checkpoints: %u\n"
-
-#: pg_resetxlog.c:662
-#, c-format
-msgid "Latest checkpoint's oldestActiveXID: %u\n"
-msgstr "oldestActiveXID des letzten Checkpoints: %u\n"
-
-#: pg_resetxlog.c:664
-#, c-format
-msgid "Latest checkpoint's oldestMultiXid: %u\n"
-msgstr "oldestMultiXid des letzten Checkpoints: %u\n"
-
-#: pg_resetxlog.c:666
-#, c-format
-msgid "Latest checkpoint's oldestMulti's DB: %u\n"
-msgstr "DB des oldestMulti des letzten Checkpoints: %u\n"
-
-#: pg_resetxlog.c:668
-#, c-format
-msgid "Latest checkpoint's oldestCommitTsXid:%u\n"
-msgstr "oldestCommitTsXid des letzten Checkpoints: %u\n"
-
-#: pg_resetxlog.c:670
-#, c-format
-msgid "Latest checkpoint's newestCommitTsXid:%u\n"
-msgstr "newestCommitTsXid des letzten Checkpoints: %u\n"
-
-#: pg_resetxlog.c:672
-#, c-format
-msgid "Maximum data alignment: %u\n"
-msgstr "Maximale Datenausrichtung (Alignment): %u\n"
-
-#: pg_resetxlog.c:675
-#, c-format
-msgid "Database block size: %u\n"
-msgstr "Datenbankblockgröße: %u\n"
-
-#: pg_resetxlog.c:677
-#, c-format
-msgid "Blocks per segment of large relation: %u\n"
-msgstr "Blöcke pro Segment: %u\n"
-
-#: pg_resetxlog.c:679
-#, c-format
-msgid "WAL block size: %u\n"
-msgstr "WAL-Blockgröße: %u\n"
-
-#: pg_resetxlog.c:681
-#, c-format
-msgid "Bytes per WAL segment: %u\n"
-msgstr "Bytes pro WAL-Segment: %u\n"
-
-#: pg_resetxlog.c:683
-#, c-format
-msgid "Maximum length of identifiers: %u\n"
-msgstr "Maximale Bezeichnerlänge: %u\n"
-
-#: pg_resetxlog.c:685
-#, c-format
-msgid "Maximum columns in an index: %u\n"
-msgstr "Maximale Spalten in einem Index: %u\n"
-
-#: pg_resetxlog.c:687
-#, c-format
-msgid "Maximum size of a TOAST chunk: %u\n"
-msgstr "Maximale Größe eines Stücks TOAST: %u\n"
-
-#: pg_resetxlog.c:689
-#, c-format
-msgid "Size of a large-object chunk: %u\n"
-msgstr "Größe eines Large-Object-Chunks: %u\n"
-
-#: pg_resetxlog.c:691
-#, c-format
-msgid "Date/time type storage: %s\n"
-msgstr "Speicherung von Datum/Zeit-Typen: %s\n"
-
-#: pg_resetxlog.c:692
-msgid "64-bit integers"
-msgstr "64-Bit-Ganzzahlen"
-
-#: pg_resetxlog.c:692
-msgid "floating-point numbers"
-msgstr "Gleitkommazahlen"
-
-#: pg_resetxlog.c:693
-#, c-format
-msgid "Float4 argument passing: %s\n"
-msgstr "Übergabe von Float4-Argumenten: %s\n"
-
-#: pg_resetxlog.c:694 pg_resetxlog.c:696
-msgid "by reference"
-msgstr "Referenz"
-
-#: pg_resetxlog.c:694 pg_resetxlog.c:696
-msgid "by value"
-msgstr "Wert"
-
-#: pg_resetxlog.c:695
-#, c-format
-msgid "Float8 argument passing: %s\n"
-msgstr "Übergabe von Float8-Argumenten: %s\n"
-
-#: pg_resetxlog.c:697
-#, c-format
-msgid "Data page checksum version: %u\n"
-msgstr "Datenseitenprüfsummenversion: %u\n"
-
-#: pg_resetxlog.c:711
-#, c-format
-msgid ""
-"\n"
-"\n"
-"Values to be changed:\n"
-"\n"
-msgstr ""
-"\n"
-"\n"
-"Zu ändernde Werte:\n"
-"\n"
-
-#: pg_resetxlog.c:714
-#, c-format
-msgid "First log segment after reset: %s\n"
-msgstr "Erstes Logdateisegment nach Zurücksetzen: %s\n"
-
-#: pg_resetxlog.c:718
-#, c-format
-msgid "NextMultiXactId: %u\n"
-msgstr "NextMultiXactId: %u\n"
-
-#: pg_resetxlog.c:720
-#, c-format
-msgid "OldestMultiXid: %u\n"
-msgstr "OldestMultiXid: %u\n"
-
-#: pg_resetxlog.c:722
-#, c-format
-msgid "OldestMulti's DB: %u\n"
-msgstr "OldestMulti's DB: %u\n"
-
-#: pg_resetxlog.c:728
-#, c-format
-msgid "NextMultiOffset: %u\n"
-msgstr "NextMultiOffset: %u\n"
-
-#: pg_resetxlog.c:734
-#, c-format
-msgid "NextOID: %u\n"
-msgstr "NextOID: %u\n"
-
-#: pg_resetxlog.c:740
-#, c-format
-msgid "NextXID: %u\n"
-msgstr "NextXID: %u\n"
-
-#: pg_resetxlog.c:742
-#, c-format
-msgid "OldestXID: %u\n"
-msgstr "OldestXID: %u\n"
-
-#: pg_resetxlog.c:744
-#, c-format
-msgid "OldestXID's DB: %u\n"
-msgstr "OldestXID's DB: %u\n"
-
-#: pg_resetxlog.c:750
-#, c-format
-msgid "NextXID epoch: %u\n"
-msgstr "NextXID-Epoche: %u\n"
-
-#: pg_resetxlog.c:756
-#, c-format
-msgid "oldestCommitTsXid: %u\n"
-msgstr "oldestCommitTsXid: %u\n"
-
-#: pg_resetxlog.c:761
-#, c-format
-msgid "newestCommitTsXid: %u\n"
-msgstr "newestCommitTsXid: %u\n"
-
-#: pg_resetxlog.c:827
-#, c-format
-msgid "%s: internal error -- sizeof(ControlFileData) is too large ... fix PG_CONTROL_SIZE\n"
-msgstr "%s: interner Fehler -- sizeof(ControlFileData) ist zu groß ... PG_CONTROL_SIZE reparieren\n"
-
-#: pg_resetxlog.c:842
-#, c-format
-msgid "%s: could not create pg_control file: %s\n"
-msgstr "%s: konnte pg_control-Datei nicht erstellen: %s\n"
-
-#: pg_resetxlog.c:853
-#, c-format
-msgid "%s: could not write pg_control file: %s\n"
-msgstr "%sL konnte pg_control-Datei nicht schreiben: %s\n"
-
-#: pg_resetxlog.c:860 pg_resetxlog.c:1156
-#, c-format
-msgid "%s: fsync error: %s\n"
-msgstr "%s: fsync-Fehler: %s\n"
-
-#: pg_resetxlog.c:900 pg_resetxlog.c:971 pg_resetxlog.c:1022
-#, c-format
-msgid "%s: could not open directory \"%s\": %s\n"
-msgstr "%s: konnte Verzeichnis »%s« nicht öffnen: %s\n"
-
-#: pg_resetxlog.c:936 pg_resetxlog.c:993 pg_resetxlog.c:1047
-#, c-format
-msgid "%s: could not read directory \"%s\": %s\n"
-msgstr "%s: konnte Verzeichnis »%s« nicht lesen: %s\n"
-
-#: pg_resetxlog.c:943 pg_resetxlog.c:1000 pg_resetxlog.c:1054
-#, c-format
-msgid "%s: could not close directory \"%s\": %s\n"
-msgstr "%s: konnte Verzeichnis »%s« nicht schließen: %s\n"
-
-#: pg_resetxlog.c:984 pg_resetxlog.c:1038
-#, c-format
-msgid "%s: could not delete file \"%s\": %s\n"
-msgstr "%s: konnte Datei »%s« nicht löschen: %s\n"
-
-#: pg_resetxlog.c:1123
-#, c-format
-msgid "%s: could not open file \"%s\": %s\n"
-msgstr "%s: konnte Datei »%s« nicht öffnen: %s\n"
-
-#: pg_resetxlog.c:1134 pg_resetxlog.c:1148
-#, c-format
-msgid "%s: could not write file \"%s\": %s\n"
-msgstr "%s: konnte Datei »%s« nicht schreiben: %s\n"
-
-#: pg_resetxlog.c:1167
-#, c-format
-msgid ""
-"%s resets the PostgreSQL transaction log.\n"
-"\n"
-msgstr ""
-"%s setzt den PostgreSQL-Transaktionslog zurück.\n"
-"\n"
-
-#: pg_resetxlog.c:1168
-#, c-format
-msgid ""
-"Usage:\n"
-" %s [OPTION]... DATADIR\n"
-"\n"
-msgstr ""
-"Aufruf:\n"
-" %s [OPTION]... DATENVERZEICHNIS\n"
-"\n"
-
-#: pg_resetxlog.c:1169
-#, c-format
-msgid "Options:\n"
-msgstr "Optionen:\n"
-
-#: pg_resetxlog.c:1170
-#, c-format
-msgid " -c XID,XID set oldest and newest transactions bearing commit timestamp\n"
-msgstr " -c XID,XID älteste und neuste Transaktion mit Commit-Timestamp setzen\n"
-
-#: pg_resetxlog.c:1171
-#, c-format
-msgid " (zero in either value means no change)\n"
-msgstr " (Null in einem Wert bedeutet keine Änderung)\n"
-
-#: pg_resetxlog.c:1172
-#, c-format
-msgid " [-D] DATADIR data directory\n"
-msgstr " [-D] DATENVERZ Datenbankverzeichnis\n"
-
-#: pg_resetxlog.c:1173
-#, c-format
-msgid " -e XIDEPOCH set next transaction ID epoch\n"
-msgstr " -e XIDEPOCHE nächste Transaktions-ID-Epoche setzen\n"
-
-#: pg_resetxlog.c:1174
-#, c-format
-msgid " -f force update to be done\n"
-msgstr " -f Änderung erzwingen\n"
-
-#: pg_resetxlog.c:1175
-#, c-format
-msgid " -l XLOGFILE force minimum WAL starting location for new transaction log\n"
-msgstr " -l XLOGDATEI minimale WAL-Startposition für neuen Log erzwingen\n"
-
-#: pg_resetxlog.c:1176
-#, c-format
-msgid " -m MXID,MXID set next and oldest multitransaction ID\n"
-msgstr " -m MXID,MXID nächste und älteste Multitransaktions-ID setzen\n"
-
-#: pg_resetxlog.c:1177
-#, c-format
-msgid " -n no update, just show what would be done (for testing)\n"
-msgstr ""
-" -n keine Änderungen; nur zeigen, was gemacht werden würde (zum\n"
-" Testen)\n"
-
-#: pg_resetxlog.c:1178
-#, c-format
-msgid " -o OID set next OID\n"
-msgstr " -o OID nächste OID setzen\n"
-
-#: pg_resetxlog.c:1179
-#, c-format
-msgid " -O OFFSET set next multitransaction offset\n"
-msgstr " -O OFFSET nächsten Multitransaktions-Offset setzen\n"
-
-#: pg_resetxlog.c:1180
-#, c-format
-msgid " -V, --version output version information, then exit\n"
-msgstr " -V, --version Versionsinformationen anzeigen, dann beenden\n"
-
-#: pg_resetxlog.c:1181
-#, c-format
-msgid " -x XID set next transaction ID\n"
-msgstr " -x XID nächste Transaktions-ID setzen\n"
-
-#: pg_resetxlog.c:1182
-#, c-format
-msgid " -?, --help show this help, then exit\n"
-msgstr " -?, --help diese Hilfe anzeigen, dann beenden\n"
-
-#: pg_resetxlog.c:1183
-#, c-format
-msgid ""
-"\n"
-"Report bugs to .\n"
-msgstr ""
-"\n"
-"Berichten Sie Fehler an .\n"
diff --git a/src/bin/pg_resetxlog/po/es.po b/src/bin/pg_resetxlog/po/es.po
deleted file mode 100644
index fec8ea973c..0000000000
--- a/src/bin/pg_resetxlog/po/es.po
+++ /dev/null
@@ -1,611 +0,0 @@
-# Spanish message translation file for pg_resetxlog
-#
-# Copyright (C) 2003-2012 PostgreSQL Global Development Group
-# This file is distributed under the same license as the PostgreSQL package.
-#
-# Ivan Hernandez , 2003.
-# Alvaro Herrera , 2004-2014
-# Jaime Casanova , 2005
-# Martín Marqués , 2013-2014
-#
-msgid ""
-msgstr ""
-"Project-Id-Version: pg_resetxlog (PostgreSQL 9.6)\n"
-"Report-Msgid-Bugs-To: pgsql-bugs@postgresql.org\n"
-"POT-Creation-Date: 2016-05-02 20:14+0000\n"
-"PO-Revision-Date: 2016-05-24 10:56-0500\n"
-"Last-Translator: Carlos Chapi \n"
-"Language-Team: Español \n"
-"Language: es\n"
-"MIME-Version: 1.0\n"
-"Content-Type: text/plain; charset=UTF-8\n"
-"Content-Transfer-Encoding: 8bit\n"
-"X-Generator: Poedit 1.8.7\n"
-
-#: ../../common/restricted_token.c:68
-#, c-format
-msgid "%s: WARNING: cannot create restricted tokens on this platform\n"
-msgstr "%s: ATENCIÓN: no se pueden crear tokens restrigidos en esta plataforma\n"
-
-#: ../../common/restricted_token.c:77
-#, c-format
-msgid "%s: could not open process token: error code %lu\n"
-msgstr "%s: no se pudo abrir el token de proceso: código de error %lu\n"
-
-#: ../../common/restricted_token.c:90
-#, c-format
-msgid "%s: could not allocate SIDs: error code %lu\n"
-msgstr "%s: no se pudo emplazar los SIDs: código de error %lu\n"
-
-#: ../../common/restricted_token.c:110
-#, c-format
-msgid "%s: could not create restricted token: error code %lu\n"
-msgstr "%s: no se pudo crear el token restringido: código de error %lu\n"
-
-#: ../../common/restricted_token.c:132
-#, c-format
-msgid "%s: could not start process for command \"%s\": error code %lu\n"
-msgstr "%s: no se pudo iniciar el proceso para la orden «%s»: código de error %lu\n"
-
-#: ../../common/restricted_token.c:170
-#, c-format
-msgid "%s: could not re-execute with restricted token: error code %lu\n"
-msgstr "%s: no se pudo re-ejecutar con el token restringido: código de error %lu\n"
-
-#: ../../common/restricted_token.c:186
-#, c-format
-msgid "%s: could not get exit code from subprocess: error code %lu\n"
-msgstr "%s: no se pudo obtener el código de salida del subproceso»: código de error %lu\n"
-
-#. translator: the second %s is a command line argument (-e, etc)
-#: pg_resetxlog.c:140 pg_resetxlog.c:155 pg_resetxlog.c:170 pg_resetxlog.c:177
-#: pg_resetxlog.c:201 pg_resetxlog.c:216 pg_resetxlog.c:224 pg_resetxlog.c:250
-#: pg_resetxlog.c:264
-#, c-format
-msgid "%s: invalid argument for option %s\n"
-msgstr "%s: argumento no válido para la opción %s\n"
-
-#: pg_resetxlog.c:141 pg_resetxlog.c:156 pg_resetxlog.c:171 pg_resetxlog.c:178
-#: pg_resetxlog.c:202 pg_resetxlog.c:217 pg_resetxlog.c:225 pg_resetxlog.c:251
-#: pg_resetxlog.c:265 pg_resetxlog.c:272 pg_resetxlog.c:285 pg_resetxlog.c:293
-#, c-format
-msgid "Try \"%s --help\" for more information.\n"
-msgstr "Prueba con «%s --help» para más información\n"
-
-#: pg_resetxlog.c:146
-#, c-format
-msgid "%s: transaction ID epoch (-e) must not be -1\n"
-msgstr "%s: el «epoch» de ID de transacción (-e) no debe ser -1\n"
-
-#: pg_resetxlog.c:161
-#, c-format
-msgid "%s: transaction ID (-x) must not be 0\n"
-msgstr "%s: el ID de transacción (-x) no debe ser 0\n"
-
-#: pg_resetxlog.c:185 pg_resetxlog.c:192
-#, c-format
-msgid "%s: transaction ID (-c) must be either 0 or greater than or equal to 2\n"
-msgstr "%s: el ID de transacción (-c) debe ser 0 o bien mayor o igual a 2\n"
-
-#: pg_resetxlog.c:207
-#, c-format
-msgid "%s: OID (-o) must not be 0\n"
-msgstr "%s: OID (-o) no debe ser cero\n"
-
-#: pg_resetxlog.c:230
-#, c-format
-msgid "%s: multitransaction ID (-m) must not be 0\n"
-msgstr "%s: el ID de multitransacción (-m) no debe ser 0\n"
-
-#: pg_resetxlog.c:240
-#, c-format
-msgid "%s: oldest multitransaction ID (-m) must not be 0\n"
-msgstr "%s: el ID de multitransacción más antiguo (-m) no debe ser 0\n"
-
-#: pg_resetxlog.c:256
-#, c-format
-msgid "%s: multitransaction offset (-O) must not be -1\n"
-msgstr "%s: la posición de multitransacción (-O) no debe ser -1\n"
-
-#: pg_resetxlog.c:283
-#, c-format
-msgid "%s: too many command-line arguments (first is \"%s\")\n"
-msgstr "%s: demasiados argumentos de línea de órdenes (el primero es «%s»)\n"
-
-#: pg_resetxlog.c:292
-#, c-format
-msgid "%s: no data directory specified\n"
-msgstr "%s: directorio de datos no especificado\n"
-
-#: pg_resetxlog.c:306
-#, c-format
-msgid "%s: cannot be executed by \"root\"\n"
-msgstr "%s: no puede ser ejecutado con el usuario «root»\n"
-
-#: pg_resetxlog.c:308
-#, c-format
-msgid "You must run %s as the PostgreSQL superuser.\n"
-msgstr "Debe ejecutar %s con el superusuario de PostgreSQL.\n"
-
-#: pg_resetxlog.c:318
-#, c-format
-msgid "%s: could not change directory to \"%s\": %s\n"
-msgstr "%s: no se pudo cambiar al directorio «%s»: %s\n"
-
-#: pg_resetxlog.c:331 pg_resetxlog.c:477
-#, c-format
-msgid "%s: could not open file \"%s\" for reading: %s\n"
-msgstr "%s: no se pudo abrir el archivo «%s» para lectura: %s\n"
-
-#: pg_resetxlog.c:338
-#, c-format
-msgid ""
-"%s: lock file \"%s\" exists\n"
-"Is a server running? If not, delete the lock file and try again.\n"
-msgstr ""
-"%s: el archivo candado «%s» existe\n"
-"¿Hay un servidor corriendo? Si no, borre el archivo candado e inténtelo de nuevo\n"
-
-#: pg_resetxlog.c:425
-#, c-format
-msgid ""
-"\n"
-"If these values seem acceptable, use -f to force reset.\n"
-msgstr ""
-"\n"
-"Si estos valores parecen aceptables, use -f para forzar reinicio.\n"
-
-#: pg_resetxlog.c:437
-#, c-format
-msgid ""
-"The database server was not shut down cleanly.\n"
-"Resetting the transaction log might cause data to be lost.\n"
-"If you want to proceed anyway, use -f to force reset.\n"
-msgstr ""
-"El servidor de base de datos no fue terminado limpiamente.\n"
-"Reiniciar la bitácora de transacciones puede causar pérdida de datos.\n"
-"Si de todas formas quiere proceder, use -f para forzar su reinicio.\n"
-
-#: pg_resetxlog.c:451
-#, c-format
-msgid "Transaction log reset\n"
-msgstr "Bitácora de transacciones reiniciada\n"
-
-#: pg_resetxlog.c:480
-#, c-format
-msgid ""
-"If you are sure the data directory path is correct, execute\n"
-" touch %s\n"
-"and try again.\n"
-msgstr ""
-"Si está seguro que la ruta al directorio de datos es correcta, ejecute\n"
-" touch %s\n"
-"y pruebe de nuevo.\n"
-
-#: pg_resetxlog.c:493
-#, c-format
-msgid "%s: could not read file \"%s\": %s\n"
-msgstr "%s: no se pudo leer el archivo «%s»: %s\n"
-
-#: pg_resetxlog.c:516
-#, c-format
-msgid "%s: pg_control exists but has invalid CRC; proceed with caution\n"
-msgstr "%s: existe pg_control pero tiene un CRC no válido, proceda con precaución\n"
-
-#: pg_resetxlog.c:525
-#, c-format
-msgid "%s: pg_control exists but is broken or unknown version; ignoring it\n"
-msgstr "%s: existe pg_control pero está roto o se desconoce su versión; ignorándolo\n"
-
-#: pg_resetxlog.c:628
-#, c-format
-msgid ""
-"Guessed pg_control values:\n"
-"\n"
-msgstr ""
-"Valores de pg_control asumidos:\n"
-"\n"
-
-#: pg_resetxlog.c:630
-#, c-format
-msgid ""
-"Current pg_control values:\n"
-"\n"
-msgstr ""
-"Valores actuales de pg_control:\n"
-"\n"
-
-#: pg_resetxlog.c:639
-#, c-format
-msgid "pg_control version number: %u\n"
-msgstr "Número de versión de pg_control: %u\n"
-
-#: pg_resetxlog.c:641
-#, c-format
-msgid "Catalog version number: %u\n"
-msgstr "Número de versión de catálogo: %u\n"
-
-#: pg_resetxlog.c:643
-#, c-format
-msgid "Database system identifier: %s\n"
-msgstr "Identificador de sistema: %s\n"
-
-#: pg_resetxlog.c:645
-#, c-format
-msgid "Latest checkpoint's TimeLineID: %u\n"
-msgstr "TimeLineID del checkpoint más reciente: %u\n"
-
-#: pg_resetxlog.c:647
-#, c-format
-msgid "Latest checkpoint's full_page_writes: %s\n"
-msgstr "full_page_writes del checkpoint más reciente: %s\n"
-
-#: pg_resetxlog.c:648
-msgid "off"
-msgstr "desactivado"
-
-#: pg_resetxlog.c:648
-msgid "on"
-msgstr "activado"
-
-#: pg_resetxlog.c:649
-#, c-format
-msgid "Latest checkpoint's NextXID: %u:%u\n"
-msgstr "NextXID del checkpoint más reciente: %u:%u\n"
-
-#: pg_resetxlog.c:652
-#, c-format
-msgid "Latest checkpoint's NextOID: %u\n"
-msgstr "NextOID del checkpoint más reciente: %u\n"
-
-#: pg_resetxlog.c:654
-#, c-format
-msgid "Latest checkpoint's NextMultiXactId: %u\n"
-msgstr "NextMultiXactId del checkpoint más reciente: %u\n"
-
-#: pg_resetxlog.c:656
-#, c-format
-msgid "Latest checkpoint's NextMultiOffset: %u\n"
-msgstr "NextMultiOffset del checkpoint más reciente: %u\n"
-
-#: pg_resetxlog.c:658
-#, c-format
-msgid "Latest checkpoint's oldestXID: %u\n"
-msgstr "oldestXID del checkpoint más reciente: %u\n"
-
-#: pg_resetxlog.c:660
-#, c-format
-msgid "Latest checkpoint's oldestXID's DB: %u\n"
-msgstr "BD del oldestXID del checkpoint más reciente: %u\n"
-
-#: pg_resetxlog.c:662
-#, c-format
-msgid "Latest checkpoint's oldestActiveXID: %u\n"
-msgstr "oldestActiveXID del checkpoint más reciente: %u\n"
-
-#: pg_resetxlog.c:664
-#, c-format
-msgid "Latest checkpoint's oldestMultiXid: %u\n"
-msgstr "oldestMultiXid del checkpoint más reciente: %u\n"
-
-#: pg_resetxlog.c:666
-#, c-format
-msgid "Latest checkpoint's oldestMulti's DB: %u\n"
-msgstr "BD del oldestMultiXid del checkpt. más reciente: %u\n"
-
-#: pg_resetxlog.c:668
-#, c-format
-msgid "Latest checkpoint's oldestCommitTsXid:%u\n"
-msgstr "oldestCommitTsXid del último checkpoint: %u\n"
-
-#: pg_resetxlog.c:670
-#, c-format
-msgid "Latest checkpoint's newestCommitTsXid:%u\n"
-msgstr "newestCommitTsXid del último checkpoint: %u\n"
-
-#: pg_resetxlog.c:672
-#, c-format
-msgid "Maximum data alignment: %u\n"
-msgstr "Máximo alineamiento de datos: %u\n"
-
-#: pg_resetxlog.c:675
-#, c-format
-msgid "Database block size: %u\n"
-msgstr "Tamaño del bloque de la base de datos: %u\n"
-
-#: pg_resetxlog.c:677
-#, c-format
-msgid "Blocks per segment of large relation: %u\n"
-msgstr "Bloques por segmento de relación grande: %u\n"
-
-#: pg_resetxlog.c:679
-#, c-format
-msgid "WAL block size: %u\n"
-msgstr "Tamaño del bloque de WAL: %u\n"
-
-#: pg_resetxlog.c:681
-#, c-format
-msgid "Bytes per WAL segment: %u\n"
-msgstr "Bytes por segmento WAL: %u\n"
-
-#: pg_resetxlog.c:683
-#, c-format
-msgid "Maximum length of identifiers: %u\n"
-msgstr "Longitud máxima de identificadores: %u\n"
-
-#: pg_resetxlog.c:685
-#, c-format
-msgid "Maximum columns in an index: %u\n"
-msgstr "Máximo número de columnas en un índice: %u\n"
-
-#: pg_resetxlog.c:687
-#, c-format
-msgid "Maximum size of a TOAST chunk: %u\n"
-msgstr "Longitud máxima de un trozo TOAST: %u\n"
-
-#: pg_resetxlog.c:689
-#, c-format
-msgid "Size of a large-object chunk: %u\n"
-msgstr "Longitud máxima de un trozo de objeto grande: %u\n"
-
-#: pg_resetxlog.c:691
-#, c-format
-msgid "Date/time type storage: %s\n"
-msgstr "Tipo de almacenamiento hora/fecha: %s\n"
-
-#: pg_resetxlog.c:692
-msgid "64-bit integers"
-msgstr "enteros de 64 bits"
-
-#: pg_resetxlog.c:692
-msgid "floating-point numbers"
-msgstr "números de coma flotante"
-
-#: pg_resetxlog.c:693
-#, c-format
-msgid "Float4 argument passing: %s\n"
-msgstr "Paso de parámetros float4: %s\n"
-
-#: pg_resetxlog.c:694 pg_resetxlog.c:696
-msgid "by reference"
-msgstr "por referencia"
-
-#: pg_resetxlog.c:694 pg_resetxlog.c:696
-msgid "by value"
-msgstr "por valor"
-
-#: pg_resetxlog.c:695
-#, c-format
-msgid "Float8 argument passing: %s\n"
-msgstr "Paso de parámetros float8: %s\n"
-
-#: pg_resetxlog.c:697
-#, c-format
-msgid "Data page checksum version: %u\n"
-msgstr "Versión de suma de verificación de datos: %u\n"
-
-#: pg_resetxlog.c:711
-#, c-format
-msgid ""
-"\n"
-"\n"
-"Values to be changed:\n"
-"\n"
-msgstr ""
-"\n"
-"\n"
-"Valores a cambiar:\n"
-"\n"
-
-#: pg_resetxlog.c:714
-#, c-format
-msgid "First log segment after reset: %s\n"
-msgstr "Primer segmento de log después de reiniciar: %s\n"
-
-#: pg_resetxlog.c:718
-#, c-format
-msgid "NextMultiXactId: %u\n"
-msgstr "NextMultiXactId: %u\n"
-
-#: pg_resetxlog.c:720
-#, c-format
-msgid "OldestMultiXid: %u\n"
-msgstr "OldestMultiXid: %u\n"
-
-#: pg_resetxlog.c:722
-#, c-format
-msgid "OldestMulti's DB: %u\n"
-msgstr "Base de datos del OldestMulti: %u\n"
-
-#: pg_resetxlog.c:728
-#, c-format
-msgid "NextMultiOffset: %u\n"
-msgstr "NextMultiOffset: %u\n"
-
-#: pg_resetxlog.c:734
-#, c-format
-msgid "NextOID: %u\n"
-msgstr "NextOID: %u\n"
-
-#: pg_resetxlog.c:740
-#, c-format
-msgid "NextXID: %u\n"
-msgstr "NextXID: %u\n"
-
-#: pg_resetxlog.c:742
-#, c-format
-msgid "OldestXID: %u\n"
-msgstr "OldestXID: %u\n"
-
-#: pg_resetxlog.c:744
-#, c-format
-msgid "OldestXID's DB: %u\n"
-msgstr "Base de datos del OldestXID: %u\n"
-
-#: pg_resetxlog.c:750
-#, c-format
-msgid "NextXID epoch: %u\n"
-msgstr "Epoch del NextXID: %u\n"
-
-#: pg_resetxlog.c:756
-#, c-format
-msgid "oldestCommitTsXid: %u\n"
-msgstr "oldestCommitTsXid: %u\n"
-
-#: pg_resetxlog.c:761
-#, c-format
-msgid "newestCommitTsXid: %u\n"
-msgstr "newestCommitTsXid: %u\n"
-
-#: pg_resetxlog.c:827
-#, c-format
-msgid "%s: internal error -- sizeof(ControlFileData) is too large ... fix PG_CONTROL_SIZE\n"
-msgstr "%s: error interno -- sizeof(ControlFileData) es demasiado grande ... corrija PG_CONTROL_SIZE\n"
-
-#: pg_resetxlog.c:842
-#, c-format
-msgid "%s: could not create pg_control file: %s\n"
-msgstr "%s: no se pudo crear el archivo pg_control: %s\n"
-
-#: pg_resetxlog.c:853
-#, c-format
-msgid "%s: could not write pg_control file: %s\n"
-msgstr "%s: no se pudo escribir el archivo pg_control: %s\n"
-
-#: pg_resetxlog.c:860 pg_resetxlog.c:1156
-#, c-format
-msgid "%s: fsync error: %s\n"
-msgstr "%s: error de fsync: %s\n"
-
-#: pg_resetxlog.c:900 pg_resetxlog.c:971 pg_resetxlog.c:1022
-#, c-format
-msgid "%s: could not open directory \"%s\": %s\n"
-msgstr "%s: no se pudo abrir el directorio «%s»: %s\n"
-
-#: pg_resetxlog.c:936 pg_resetxlog.c:993 pg_resetxlog.c:1047
-#, c-format
-msgid "%s: could not read directory \"%s\": %s\n"
-msgstr "%s: no se pudo leer el directorio «%s»: %s\n"
-
-#: pg_resetxlog.c:943 pg_resetxlog.c:1000 pg_resetxlog.c:1054
-#, c-format
-msgid "%s: could not close directory \"%s\": %s\n"
-msgstr "%s: no se pudo cerrar el directorio «%s»: %s\n"
-
-#: pg_resetxlog.c:984 pg_resetxlog.c:1038
-#, c-format
-msgid "%s: could not delete file \"%s\": %s\n"
-msgstr "%s: no se pudo borrar el archivo «%s»: %s\n"
-
-#: pg_resetxlog.c:1123
-#, c-format
-msgid "%s: could not open file \"%s\": %s\n"
-msgstr "%s: no se pudo abrir el archivo «%s»: %s\n"
-
-#: pg_resetxlog.c:1134 pg_resetxlog.c:1148
-#, c-format
-msgid "%s: could not write file \"%s\": %s\n"
-msgstr "%s: no se pudo escribir en el archivo «%s»: %s\n"
-
-#: pg_resetxlog.c:1167
-#, c-format
-msgid ""
-"%s resets the PostgreSQL transaction log.\n"
-"\n"
-msgstr ""
-"%s reinicia la bitácora de transacciones de PostgreSQL.\n"
-"\n"
-
-#: pg_resetxlog.c:1168
-#, c-format
-msgid ""
-"Usage:\n"
-" %s [OPTION]... DATADIR\n"
-"\n"
-msgstr ""
-"Uso:\n"
-" %s [OPCIÓN]... DATADIR\n"
-"\n"
-
-#: pg_resetxlog.c:1169
-#, c-format
-msgid "Options:\n"
-msgstr "Opciones:\n"
-
-#: pg_resetxlog.c:1170
-#, c-format
-msgid " -c XID,XID set oldest and newest transactions bearing commit timestamp\n"
-msgstr ""
-" -c XID,XID asigna los ID de transacciones más antiguo y más nuevo que llevan\n"
-" timestamp de commit\n"
-
-#: pg_resetxlog.c:1171
-#, c-format
-msgid " (zero in either value means no change)\n"
-msgstr " (cero en cualquiera de ellos significa no cambiar)\n"
-
-#: pg_resetxlog.c:1172
-#, c-format
-msgid " [-D] DATADIR data directory\n"
-msgstr " [-D] DATADIR directorio de datos\n"
-
-#: pg_resetxlog.c:1173
-#, c-format
-msgid " -e XIDEPOCH set next transaction ID epoch\n"
-msgstr " -e XIDEPOCH asigna el siguiente «epoch» de ID de transacción\n"
-
-#: pg_resetxlog.c:1174
-#, c-format
-msgid " -f force update to be done\n"
-msgstr " -f fuerza que la actualización sea hecha\n"
-
-#: pg_resetxlog.c:1175
-#, c-format
-msgid " -l XLOGFILE force minimum WAL starting location for new transaction log\n"
-msgstr ""
-" -l XLOGFILE fuerza una posición mínima de inicio de WAL para una\n"
-" nueva transacción\n"
-
-#: pg_resetxlog.c:1176
-#, c-format
-msgid " -m MXID,MXID set next and oldest multitransaction ID\n"
-msgstr " -m MXID,MXID asigna el siguiente ID de multitransacción y el más antiguo\n"
-
-#: pg_resetxlog.c:1177
-#, c-format
-msgid " -n no update, just show what would be done (for testing)\n"
-msgstr " -n no actualiza, sólo muestra lo que va a hacer (para pruebas)\n"
-
-#: pg_resetxlog.c:1178
-#, c-format
-msgid " -o OID set next OID\n"
-msgstr " -o OID asigna el siguiente OID\n"
-
-#: pg_resetxlog.c:1179
-#, c-format
-msgid " -O OFFSET set next multitransaction offset\n"
-msgstr " -O OFFSET asigna la siguiente posición de multitransacción\n"
-
-#: pg_resetxlog.c:1180
-#, c-format
-msgid " -V, --version output version information, then exit\n"
-msgstr " -V, --version muestra información de la versión, luego sale\n"
-
-#: pg_resetxlog.c:1181
-#, c-format
-msgid " -x XID set next transaction ID\n"
-msgstr " -x XID asigna el siguiente ID de transacción\n"
-
-#: pg_resetxlog.c:1182
-#, c-format
-msgid " -?, --help show this help, then exit\n"
-msgstr " -?, --help muestra esta ayuda, luego sale\n"
-
-#: pg_resetxlog.c:1183
-#, c-format
-msgid ""
-"\n"
-"Report bugs to .\n"
-msgstr ""
-"\n"
-"Reporte errores a .\n"
diff --git a/src/bin/pg_resetxlog/po/fr.po b/src/bin/pg_resetxlog/po/fr.po
deleted file mode 100644
index 1a9a57f77f..0000000000
--- a/src/bin/pg_resetxlog/po/fr.po
+++ /dev/null
@@ -1,644 +0,0 @@
-# translation of pg_resetxlog.po to fr_fr
-# french message translation file for pg_resetxlog
-#
-# Use these quotes: � %s �
-#
-# Guillaume Lelarge , 2003-2009.
-# St�phane Schildknecht , 2009.
-msgid ""
-msgstr ""
-"Project-Id-Version: PostgreSQL 9.6\n"
-"Report-Msgid-Bugs-To: pgsql-bugs@postgresql.org\n"
-"POT-Creation-Date: 2016-05-08 21:14+0000\n"
-"PO-Revision-Date: 2016-05-09 10:28+0200\n"
-"Last-Translator: Guillaume Lelarge \n"
-"Language-Team: PostgreSQLfr \n"
-"Language: fr\n"
-"MIME-Version: 1.0\n"
-"Content-Type: text/plain; charset=ISO-8859-15\n"
-"Content-Transfer-Encoding: 8bit\n"
-"X-Generator: Poedit 1.8.7.1\n"
-
-#: ../../common/restricted_token.c:68
-#, c-format
-msgid "%s: WARNING: cannot create restricted tokens on this platform\n"
-msgstr "%s : ATTENTION : ne peut pas cr�r les jetons restreints sur cette plateforme\n"
-
-#: ../../common/restricted_token.c:77
-#, c-format
-msgid "%s: could not open process token: error code %lu\n"
-msgstr "%s : n'a pas pu ouvrir le jeton du processus : code d'erreur %lu\n"
-
-#: ../../common/restricted_token.c:90
-#, c-format
-msgid "%s: could not allocate SIDs: error code %lu\n"
-msgstr "%s : n'a pas pu allouer les SID : code d'erreur %lu\n"
-
-#: ../../common/restricted_token.c:110
-#, c-format
-msgid "%s: could not create restricted token: error code %lu\n"
-msgstr "%s : n'a pas pu cr�er le jeton restreint : code d'erreur %lu\n"
-
-#: ../../common/restricted_token.c:132
-#, c-format
-msgid "%s: could not start process for command \"%s\": error code %lu\n"
-msgstr "%s : n'a pas pu d�marrer le processus pour la commande � %s � : code d'erreur %lu\n"
-
-#: ../../common/restricted_token.c:170
-#, c-format
-msgid "%s: could not re-execute with restricted token: error code %lu\n"
-msgstr "%s : n'a pas pu r�-ex�cuter le jeton restreint : code d'erreur %lu\n"
-
-#: ../../common/restricted_token.c:186
-#, c-format
-msgid "%s: could not get exit code from subprocess: error code %lu\n"
-msgstr "%s : n'a pas pu r�cup�rer le code de statut du sous-processus : code d'erreur %lu\n"
-
-#. translator: the second %s is a command line argument (-e, etc)
-#: pg_resetxlog.c:140 pg_resetxlog.c:155 pg_resetxlog.c:170 pg_resetxlog.c:177
-#: pg_resetxlog.c:201 pg_resetxlog.c:216 pg_resetxlog.c:224 pg_resetxlog.c:250
-#: pg_resetxlog.c:264
-#, c-format
-msgid "%s: invalid argument for option %s\n"
-msgstr "%s : argument invalide pour l'option %s\n"
-
-#: pg_resetxlog.c:141 pg_resetxlog.c:156 pg_resetxlog.c:171 pg_resetxlog.c:178
-#: pg_resetxlog.c:202 pg_resetxlog.c:217 pg_resetxlog.c:225 pg_resetxlog.c:251
-#: pg_resetxlog.c:265 pg_resetxlog.c:272 pg_resetxlog.c:285 pg_resetxlog.c:293
-#, c-format
-msgid "Try \"%s --help\" for more information.\n"
-msgstr "Essayer � %s --help � pour plus d'informations.\n"
-
-#: pg_resetxlog.c:146
-#, c-format
-msgid "%s: transaction ID epoch (-e) must not be -1\n"
-msgstr ""
-"%s : la valeur epoch de l'identifiant de transaction (-e) ne doit pas �tre\n"
-"-1\n"
-
-#: pg_resetxlog.c:161
-#, c-format
-msgid "%s: transaction ID (-x) must not be 0\n"
-msgstr "%s : l'identifiant de la transaction (-x) ne doit pas �tre 0\n"
-
-#: pg_resetxlog.c:185 pg_resetxlog.c:192
-#, c-format
-msgid "%s: transaction ID (-c) must be either 0 or greater than or equal to 2\n"
-msgstr "%s : l'identifiant de transaction (-c) doit �tre 0 ou sup�rieur ou �gal � 2\n"
-
-#: pg_resetxlog.c:207
-#, c-format
-msgid "%s: OID (-o) must not be 0\n"
-msgstr "%s : l'OID (-o) ne doit pas �tre 0\n"
-
-#: pg_resetxlog.c:230
-#, c-format
-msgid "%s: multitransaction ID (-m) must not be 0\n"
-msgstr "%s : l'identifiant de multi-transaction (-m) ne doit pas �tre 0\n"
-
-#: pg_resetxlog.c:240
-#, c-format
-msgid "%s: oldest multitransaction ID (-m) must not be 0\n"
-msgstr "%s : l'identifiant de multi-transaction le plus ancien (-m) ne doit pas �tre 0\n"
-
-#: pg_resetxlog.c:256
-#, c-format
-msgid "%s: multitransaction offset (-O) must not be -1\n"
-msgstr "%s : le d�calage de multi-transaction (-O) ne doit pas �tre -1\n"
-
-#: pg_resetxlog.c:283
-#, c-format
-msgid "%s: too many command-line arguments (first is \"%s\")\n"
-msgstr "%s : trop d'arguments en ligne de commande (le premier �tant � %s �)\n"
-
-#: pg_resetxlog.c:292
-#, c-format
-msgid "%s: no data directory specified\n"
-msgstr "%s : aucun r�pertoire de donn�es indiqu�\n"
-
-#: pg_resetxlog.c:306
-#, c-format
-msgid "%s: cannot be executed by \"root\"\n"
-msgstr "%s : ne peut pas �tre ex�cut� par � root �\n"
-
-#: pg_resetxlog.c:308
-#, c-format
-msgid "You must run %s as the PostgreSQL superuser.\n"
-msgstr "Vous devez ex�cuter %s en tant que super-utilisateur PostgreSQL.\n"
-
-#: pg_resetxlog.c:318
-#, c-format
-msgid "%s: could not change directory to \"%s\": %s\n"
-msgstr "%s : n'a pas pu acc�der au r�pertoire � %s � : %s\n"
-
-#: pg_resetxlog.c:331 pg_resetxlog.c:477
-#, c-format
-msgid "%s: could not open file \"%s\" for reading: %s\n"
-msgstr "%s : n'a pas pu ouvrir le fichier � %s � en lecture : %s\n"
-
-#: pg_resetxlog.c:338
-#, c-format
-msgid ""
-"%s: lock file \"%s\" exists\n"
-"Is a server running? If not, delete the lock file and try again.\n"
-msgstr ""
-"%s : le verrou � %s � existe\n"
-"Le serveur est-il d�marr� ? Sinon, supprimer le fichier verrou et r�essayer.\n"
-
-#: pg_resetxlog.c:425
-#, c-format
-msgid ""
-"\n"
-"If these values seem acceptable, use -f to force reset.\n"
-msgstr ""
-"\n"
-"Si ces valeurs semblent acceptables, utiliser -f pour forcer la\n"
-"r�initialisation.\n"
-
-#: pg_resetxlog.c:437
-#, c-format
-msgid ""
-"The database server was not shut down cleanly.\n"
-"Resetting the transaction log might cause data to be lost.\n"
-"If you want to proceed anyway, use -f to force reset.\n"
-msgstr ""
-"Le serveur de bases de donn�es n'a pas �t� arr�t� proprement.\n"
-"R�-initialiser le journal des transactions peut occasionner des pertes de\n"
-"donn�es.\n"
-"Pour continuer malgr� tout, utiliser -f pour forcer la\n"
-"r�initialisation.\n"
-
-#: pg_resetxlog.c:451
-#, c-format
-msgid "Transaction log reset\n"
-msgstr "R�initialisation du journal des transactions\n"
-
-#: pg_resetxlog.c:480
-#, c-format
-msgid ""
-"If you are sure the data directory path is correct, execute\n"
-" touch %s\n"
-"and try again.\n"
-msgstr ""
-"Si le chemin du r�pertoire de donn�es est correct, ex�cuter\n"
-" touch %s\n"
-"et r�essayer.\n"
-
-#: pg_resetxlog.c:493
-#, c-format
-msgid "%s: could not read file \"%s\": %s\n"
-msgstr "%s : n'a pas pu lire le fichier � %s � : %s\n"
-
-#: pg_resetxlog.c:516
-#, c-format
-msgid "%s: pg_control exists but has invalid CRC; proceed with caution\n"
-msgstr "%s : pg_control existe mais son CRC est invalide ; agir avec pr�caution\n"
-
-#: pg_resetxlog.c:525
-#, c-format
-msgid "%s: pg_control exists but is broken or unknown version; ignoring it\n"
-msgstr "%s : pg_control existe mais est corrompu ou de version inconnue ; ignor�\n"
-
-#: pg_resetxlog.c:628
-#, c-format
-msgid ""
-"Guessed pg_control values:\n"
-"\n"
-msgstr ""
-"Valeurs de pg_control devin�es :\n"
-"\n"
-
-#: pg_resetxlog.c:630
-#, c-format
-msgid ""
-"Current pg_control values:\n"
-"\n"
-msgstr ""
-"Valeurs actuelles de pg_control :\n"
-"\n"
-
-#: pg_resetxlog.c:639
-#, c-format
-msgid "pg_control version number: %u\n"
-msgstr "Num�ro de version de pg_control : %u\n"
-
-#: pg_resetxlog.c:641
-#, c-format
-msgid "Catalog version number: %u\n"
-msgstr "Num�ro de version du catalogue : %u\n"
-
-#: pg_resetxlog.c:643
-#, c-format
-msgid "Database system identifier: %s\n"
-msgstr "Identifiant du syst�me de base de donn�es : %s\n"
-
-#: pg_resetxlog.c:645
-#, c-format
-msgid "Latest checkpoint's TimeLineID: %u\n"
-msgstr "Dernier TimeLineID du point de contr�le : %u\n"
-
-#: pg_resetxlog.c:647
-#, c-format
-msgid "Latest checkpoint's full_page_writes: %s\n"
-msgstr "Dernier full_page_writes du point de contr�le : %s\n"
-
-#: pg_resetxlog.c:648
-msgid "off"
-msgstr "d�sactiv�"
-
-#: pg_resetxlog.c:648
-msgid "on"
-msgstr "activ�"
-
-#: pg_resetxlog.c:649
-#, c-format
-msgid "Latest checkpoint's NextXID: %u:%u\n"
-msgstr "Dernier NextXID du point de contr�le : %u:%u\n"
-
-#: pg_resetxlog.c:652
-#, c-format
-msgid "Latest checkpoint's NextOID: %u\n"
-msgstr "Dernier NextOID du point de contr�le : %u\n"
-
-#: pg_resetxlog.c:654
-#, c-format
-msgid "Latest checkpoint's NextMultiXactId: %u\n"
-msgstr "Dernier NextMultiXactId du point de contr�le : %u\n"
-
-#: pg_resetxlog.c:656
-#, c-format
-msgid "Latest checkpoint's NextMultiOffset: %u\n"
-msgstr "Dernier NextMultiOffset du point de contr�le : %u\n"
-
-#: pg_resetxlog.c:658
-#, c-format
-msgid "Latest checkpoint's oldestXID: %u\n"
-msgstr "Dernier oldestXID du point de contr�le : %u\n"
-
-#: pg_resetxlog.c:660
-#, c-format
-msgid "Latest checkpoint's oldestXID's DB: %u\n"
-msgstr "Dernier oldestXID du point de contr�le de la base : %u\n"
-
-#: pg_resetxlog.c:662
-#, c-format
-msgid "Latest checkpoint's oldestActiveXID: %u\n"
-msgstr "Dernier oldestActiveXID du point de contr�le : %u\n"
-
-#: pg_resetxlog.c:664
-#, c-format
-msgid "Latest checkpoint's oldestMultiXid: %u\n"
-msgstr "Dernier oldestMultiXID du point de contr�le : %u\n"
-
-#: pg_resetxlog.c:666
-#, c-format
-msgid "Latest checkpoint's oldestMulti's DB: %u\n"
-msgstr "Dernier oldestMulti du point de contr�le de la base : %u\n"
-
-#: pg_resetxlog.c:668
-#, c-format
-msgid "Latest checkpoint's oldestCommitTsXid:%u\n"
-msgstr "Dernier oldestCommitTsXid du point de contr�le : %u\n"
-
-#: pg_resetxlog.c:670
-#, c-format
-msgid "Latest checkpoint's newestCommitTsXid:%u\n"
-msgstr "Dernier newestCommitTsXid du point de contr�le : %u\n"
-
-#: pg_resetxlog.c:672
-#, c-format
-msgid "Maximum data alignment: %u\n"
-msgstr "Alignement maximal des donn�es : %u\n"
-
-#: pg_resetxlog.c:675
-#, c-format
-msgid "Database block size: %u\n"
-msgstr "Taille du bloc de la base de donn�es : %u\n"
-
-#: pg_resetxlog.c:677
-#, c-format
-msgid "Blocks per segment of large relation: %u\n"
-msgstr "Blocs par segment des relations volumineuses : %u\n"
-
-#: pg_resetxlog.c:679
-#, c-format
-msgid "WAL block size: %u\n"
-msgstr "Taille de bloc du journal de transaction : %u\n"
-
-#: pg_resetxlog.c:681
-#, c-format
-msgid "Bytes per WAL segment: %u\n"
-msgstr "Octets par segment du journal de transaction : %u\n"
-
-#: pg_resetxlog.c:683
-#, c-format
-msgid "Maximum length of identifiers: %u\n"
-msgstr "Longueur maximale des identifiants : %u\n"
-
-#: pg_resetxlog.c:685
-#, c-format
-msgid "Maximum columns in an index: %u\n"
-msgstr "Nombre maximal de colonnes d'un index: %u\n"
-
-#: pg_resetxlog.c:687
-#, c-format
-msgid "Maximum size of a TOAST chunk: %u\n"
-msgstr "Longueur maximale d'un morceau TOAST : %u\n"
-
-#: pg_resetxlog.c:689
-#, c-format
-msgid "Size of a large-object chunk: %u\n"
-msgstr "Taille d'un morceau de Large Object : %u\n"
-
-#: pg_resetxlog.c:691
-#, c-format
-msgid "Date/time type storage: %s\n"
-msgstr "Stockage du type date/heure : %s\n"
-
-#: pg_resetxlog.c:692
-msgid "64-bit integers"
-msgstr "entiers 64-bits"
-
-#: pg_resetxlog.c:692
-msgid "floating-point numbers"
-msgstr "nombres � virgule flottante"
-
-#: pg_resetxlog.c:693
-#, c-format
-msgid "Float4 argument passing: %s\n"
-msgstr "Passage d'argument float4 : %s\n"
-
-#: pg_resetxlog.c:694 pg_resetxlog.c:696
-msgid "by reference"
-msgstr "par r�f�rence"
-
-#: pg_resetxlog.c:694 pg_resetxlog.c:696
-msgid "by value"
-msgstr "par valeur"
-
-#: pg_resetxlog.c:695
-#, c-format
-msgid "Float8 argument passing: %s\n"
-msgstr "Passage d'argument float8 : %s\n"
-
-#: pg_resetxlog.c:697
-#, c-format
-msgid "Data page checksum version: %u\n"
-msgstr "Version des sommes de contr�le des pages de donn�es : %u\n"
-
-#: pg_resetxlog.c:711
-#, c-format
-msgid ""
-"\n"
-"\n"
-"Values to be changed:\n"
-"\n"
-msgstr ""
-"\n"
-"\n"
-"Valeurs � changer :\n"
-"\n"
-
-#: pg_resetxlog.c:714
-#, c-format
-msgid "First log segment after reset: %s\n"
-msgstr "Premier segment du journal apr�s r�initialisation : %s\n"
-
-#: pg_resetxlog.c:718
-#, c-format
-msgid "NextMultiXactId: %u\n"
-msgstr "NextMultiXactId: %u\n"
-
-#: pg_resetxlog.c:720
-#, c-format
-msgid "OldestMultiXid: %u\n"
-msgstr "OldestMultiXid: %u\n"
-
-#: pg_resetxlog.c:722
-#, c-format
-msgid "OldestMulti's DB: %u\n"
-msgstr "OldestMulti's DB: %u\n"
-
-#: pg_resetxlog.c:728
-#, c-format
-msgid "NextMultiOffset: %u\n"
-msgstr "NextMultiOffset: %u\n"
-
-#: pg_resetxlog.c:734
-#, c-format
-msgid "NextOID: %u\n"
-msgstr "NextOID: %u\n"
-
-#: pg_resetxlog.c:740
-#, c-format
-msgid "NextXID: %u\n"
-msgstr "NextXID: %u\n"
-
-#: pg_resetxlog.c:742
-#, c-format
-msgid "OldestXID: %u\n"
-msgstr "OldestXID: %u\n"
-
-#: pg_resetxlog.c:744
-#, c-format
-msgid "OldestXID's DB: %u\n"
-msgstr "OldestXID's DB: %u\n"
-
-#: pg_resetxlog.c:750
-#, c-format
-msgid "NextXID epoch: %u\n"
-msgstr "NextXID Epoch: %u\n"
-
-#: pg_resetxlog.c:756
-#, c-format
-msgid "oldestCommitTsXid: %u\n"
-msgstr "oldestCommitTsXid: %u\n"
-
-#: pg_resetxlog.c:761
-#, c-format
-msgid "newestCommitTsXid: %u\n"
-msgstr "newestCommitTsXid: %u\n"
-
-#: pg_resetxlog.c:827
-#, c-format
-msgid "%s: internal error -- sizeof(ControlFileData) is too large ... fix PG_CONTROL_SIZE\n"
-msgstr ""
-"%s : erreur interne -- sizeof(ControlFileData) est trop important...\n"
-"corrigez PG_CONTROL_SIZE\n"
-
-#: pg_resetxlog.c:842
-#, c-format
-msgid "%s: could not create pg_control file: %s\n"
-msgstr "%s : n'a pas pu cr�er le fichier pg_control : %s\n"
-
-#: pg_resetxlog.c:853
-#, c-format
-msgid "%s: could not write pg_control file: %s\n"
-msgstr "%s : n'a pas pu �crire le fichier pg_control : %s\n"
-
-#: pg_resetxlog.c:860 pg_resetxlog.c:1156
-#, c-format
-msgid "%s: fsync error: %s\n"
-msgstr "%s : erreur fsync : %s\n"
-
-#: pg_resetxlog.c:900 pg_resetxlog.c:971 pg_resetxlog.c:1022
-#, c-format
-msgid "%s: could not open directory \"%s\": %s\n"
-msgstr "%s : n'a pas pu ouvrir le r�pertoire � %s � : %s\n"
-
-#: pg_resetxlog.c:936 pg_resetxlog.c:993 pg_resetxlog.c:1047
-#, c-format
-msgid "%s: could not read directory \"%s\": %s\n"
-msgstr "%s : n'a pas pu lire le r�pertoire � %s � : %s\n"
-
-#: pg_resetxlog.c:943 pg_resetxlog.c:1000 pg_resetxlog.c:1054
-#, c-format
-msgid "%s: could not close directory \"%s\": %s\n"
-msgstr "%s : n'a pas pu fermer le r�pertoire � %s � : %s\n"
-
-#: pg_resetxlog.c:984 pg_resetxlog.c:1038
-#, c-format
-msgid "%s: could not delete file \"%s\": %s\n"
-msgstr "%s : n'a pas pu supprimer le fichier � %s � : %s\n"
-
-#: pg_resetxlog.c:1123
-#, c-format
-msgid "%s: could not open file \"%s\": %s\n"
-msgstr "%s : n'a pas pu ouvrir le fichier � %s � : %s\n"
-
-#: pg_resetxlog.c:1134 pg_resetxlog.c:1148
-#, c-format
-msgid "%s: could not write file \"%s\": %s\n"
-msgstr "%s : n'a pas pu �crire le fichier � %s � : %s\n"
-
-#: pg_resetxlog.c:1167
-#, c-format
-msgid ""
-"%s resets the PostgreSQL transaction log.\n"
-"\n"
-msgstr ""
-"%s r�initialise le journal des transactions PostgreSQL.\n"
-"\n"
-
-#: pg_resetxlog.c:1168
-#, c-format
-msgid ""
-"Usage:\n"
-" %s [OPTION]... DATADIR\n"
-"\n"
-msgstr ""
-"Usage :\n"
-" %s [OPTION]... R�P_DONN�ES\n"
-"\n"
-
-#: pg_resetxlog.c:1169
-#, c-format
-msgid "Options:\n"
-msgstr "Options :\n"
-
-#: pg_resetxlog.c:1170
-#, c-format
-msgid " -c XID,XID set oldest and newest transactions bearing commit timestamp\n"
-msgstr " -c XID,XID configure la plus ancienne et la plus r�cente transaction\n"
-
-#: pg_resetxlog.c:1171
-#, c-format
-msgid " (zero in either value means no change)\n"
-msgstr " (z�ro dans l'une des deux valeurs signifie aucun changement)\n"
-
-#: pg_resetxlog.c:1172
-#, c-format
-msgid " [-D] DATADIR data directory\n"
-msgstr " [-D] R�PDONNEES r�pertoire de la base de donn�es\n"
-
-#: pg_resetxlog.c:1173
-#, c-format
-msgid " -e XIDEPOCH set next transaction ID epoch\n"
-msgstr ""
-" -e XIDEPOCH fixe la valeur epoch du prochain identifiant de\n"
-" transaction\n"
-
-#: pg_resetxlog.c:1174
-#, c-format
-msgid " -f force update to be done\n"
-msgstr " -f force la mise � jour\n"
-
-#: pg_resetxlog.c:1175
-#, c-format
-msgid " -l XLOGFILE force minimum WAL starting location for new transaction log\n"
-msgstr ""
-" -l FICHIERXLOG force l'emplacement minimal de d�but des WAL du nouveau\n"
-" journal de transactions\n"
-
-#: pg_resetxlog.c:1176
-#, c-format
-msgid " -m MXID,MXID set next and oldest multitransaction ID\n"
-msgstr " -m MXID,MXID fixe le prochain identifiant multi-transaction\n"
-
-#: pg_resetxlog.c:1177
-#, c-format
-msgid " -n no update, just show what would be done (for testing)\n"
-msgstr ""
-" -n pas de mise � jour, affiche simplement ce qui sera fait\n"
-" (pour test)\n"
-
-#: pg_resetxlog.c:1178
-#, c-format
-msgid " -o OID set next OID\n"
-msgstr " -o OID fixe le prochain OID\n"
-
-#: pg_resetxlog.c:1179
-#, c-format
-msgid " -O OFFSET set next multitransaction offset\n"
-msgstr " -O D�CALAGE fixe le d�calage de la prochaine multi-transaction\n"
-
-#: pg_resetxlog.c:1180
-#, c-format
-msgid " -V, --version output version information, then exit\n"
-msgstr " -V, --version affiche la version et quitte\n"
-
-#: pg_resetxlog.c:1181
-#, c-format
-msgid " -x XID set next transaction ID\n"
-msgstr " -x XID fixe le prochain identifiant de transaction\n"
-
-#: pg_resetxlog.c:1182
-#, c-format
-msgid " -?, --help show this help, then exit\n"
-msgstr " -?, --help affiche cette aide et quitte\n"
-
-#: pg_resetxlog.c:1183
-#, c-format
-msgid ""
-"\n"
-"Report bugs to .\n"
-msgstr ""
-"\n"
-"Rapporter les bogues � .\n"
-
-#~ msgid "%s: invalid argument for option -x\n"
-#~ msgstr "%s : argument invalide pour l'option -x\n"
-
-#~ msgid "%s: invalid argument for option -o\n"
-#~ msgstr "%s : argument invalide pour l'option -o\n"
-
-#~ msgid "%s: invalid argument for option -m\n"
-#~ msgstr "%s : argument invalide pour l'option -m\n"
-
-#~ msgid "%s: invalid argument for option -O\n"
-#~ msgstr "%s : argument invalide pour l'option -O\n"
-
-#~ msgid "%s: invalid argument for option -l\n"
-#~ msgstr "%s : argument invalide pour l'option -l\n"
-
-#~ msgid "%s: could not read from directory \"%s\": %s\n"
-#~ msgstr "%s : n'a pas pu lire le r�pertoire � %s � : %s\n"
-
-#~ msgid " --help show this help, then exit\n"
-#~ msgstr " --help affiche cette aide et quitte\n"
-
-#~ msgid " --version output version information, then exit\n"
-#~ msgstr " --version afficherla version et quitte\n"
-
-#~ msgid "First log file ID after reset: %u\n"
-#~ msgstr "Premier identifiant du journal apr�s r�initialisation : %u\n"
diff --git a/src/bin/pg_resetxlog/po/it.po b/src/bin/pg_resetxlog/po/it.po
deleted file mode 100644
index 9fc4479060..0000000000
--- a/src/bin/pg_resetxlog/po/it.po
+++ /dev/null
@@ -1,620 +0,0 @@
-#
-# Translation of pg_resetxlog to Italian
-# PostgreSQL Project
-#
-# Associazione Culturale ITPUG - Italian PostgreSQL Users Group
-# http://www.itpug.org/ - info@itpug.org
-#
-# Traduttori:
-# * Diego Cinelli
-# * Daniele Varrazzo
-#
-# Revisori:
-# * Emanuele Zamprogno
-#
-# Traduttori precedenti:
-# * Fabrizio Mazzoni
-# * Mirko Tebaldi