From 2c8f4836db058d0715bc30a30655d646287ba509 Mon Sep 17 00:00:00 2001
From: Andres Freund
Date: Fri, 8 May 2015 00:20:46 +0200
Subject: Represent columns requiring insert and update privileges indentently.
Previously, relation range table entries used a single Bitmapset field
representing which columns required either UPDATE or INSERT privileges,
despite the fact that INSERT and UPDATE privileges are separately
cataloged, and may be independently held. As statements so far required
either insert or update privileges but never both, that was
sufficient. The required permission could be inferred from the top level
statement run.
The upcoming INSERT ... ON CONFLICT UPDATE feature needs to
independently check for both privileges in one statement though, so that
is not sufficient anymore.
Bumps catversion as stored rules change.
Author: Peter Geoghegan
Reviewed-By: Andres Freund
---
contrib/postgres_fdw/postgres_fdw.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'contrib/postgres_fdw/postgres_fdw.c')
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 478e12484b..de732319d7 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -1205,7 +1205,7 @@ postgresPlanForeignModify(PlannerInfo *root,
int col;
col = -1;
- while ((col = bms_next_member(rte->modifiedCols, col)) >= 0)
+ while ((col = bms_next_member(rte->updatedCols, col)) >= 0)
{
/* bit numbers are offset by FirstLowInvalidHeapAttributeNumber */
AttrNumber attno = col + FirstLowInvalidHeapAttributeNumber;
--
cgit v1.2.3
From 168d5805e4c08bed7b95d351bf097cff7c07dd65 Mon Sep 17 00:00:00 2001
From: Andres Freund
Date: Fri, 8 May 2015 05:31:36 +0200
Subject: Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
---
contrib/pg_stat_statements/pg_stat_statements.c | 25 ++
contrib/postgres_fdw/deparse.c | 7 +-
contrib/postgres_fdw/expected/postgres_fdw.out | 5 +
contrib/postgres_fdw/postgres_fdw.c | 15 +-
contrib/postgres_fdw/postgres_fdw.h | 2 +-
contrib/postgres_fdw/sql/postgres_fdw.sql | 3 +
contrib/test_decoding/expected/ddl.out | 34 ++
contrib/test_decoding/expected/toast.out | 9 +-
contrib/test_decoding/sql/ddl.sql | 22 +
contrib/test_decoding/sql/toast.sql | 5 +
doc/src/sgml/fdwhandler.sgml | 7 +
doc/src/sgml/keywords.sgml | 7 +
doc/src/sgml/mvcc.sgml | 23 +-
doc/src/sgml/plpgsql.sgml | 14 +-
doc/src/sgml/postgres-fdw.sgml | 8 +
doc/src/sgml/protocol.sgml | 13 +-
doc/src/sgml/ref/create_policy.sgml | 63 ++-
doc/src/sgml/ref/create_rule.sgml | 6 +-
doc/src/sgml/ref/create_table.sgml | 4 +-
doc/src/sgml/ref/create_trigger.sgml | 5 +-
doc/src/sgml/ref/create_view.sgml | 9 +-
doc/src/sgml/ref/insert.sgml | 403 ++++++++++++++++-
doc/src/sgml/trigger.sgml | 48 ++-
src/backend/access/heap/heapam.c | 377 ++++++++++++++--
src/backend/access/heap/hio.c | 27 +-
src/backend/access/heap/tuptoaster.c | 8 +
src/backend/access/nbtree/nbtinsert.c | 28 +-
src/backend/access/rmgrdesc/heapdesc.c | 9 +
src/backend/catalog/index.c | 53 ++-
src/backend/catalog/indexing.c | 2 +-
src/backend/catalog/sql_features.txt | 2 +-
src/backend/commands/constraint.c | 2 +-
src/backend/commands/copy.c | 7 +-
src/backend/commands/explain.c | 70 ++-
src/backend/commands/trigger.c | 19 +-
src/backend/executor/execIndexing.c | 417 +++++++++++++++---
src/backend/executor/execMain.c | 53 ++-
src/backend/executor/nodeLockRows.c | 12 +-
src/backend/executor/nodeModifyTable.c | 459 +++++++++++++++++++-
src/backend/nodes/copyfuncs.c | 84 ++++
src/backend/nodes/equalfuncs.c | 62 +++
src/backend/nodes/nodeFuncs.c | 87 ++++
src/backend/nodes/outfuncs.c | 41 +-
src/backend/nodes/readfuncs.c | 40 ++
src/backend/optimizer/plan/createplan.c | 26 +-
src/backend/optimizer/plan/planner.c | 27 ++
src/backend/optimizer/plan/setrefs.c | 52 ++-
src/backend/optimizer/plan/subselect.c | 4 +
src/backend/optimizer/prep/prepjointree.c | 6 +
src/backend/optimizer/prep/preptlist.c | 13 +
src/backend/optimizer/util/plancat.c | 352 +++++++++++++++
src/backend/parser/analyze.c | 149 ++++++-
src/backend/parser/gram.y | 121 +++++-
src/backend/parser/parse_clause.c | 203 +++++++++
src/backend/parser/parse_collate.c | 2 +
src/backend/parser/parse_target.c | 11 +-
src/backend/replication/logical/decode.c | 66 ++-
src/backend/replication/logical/reorderbuffer.c | 159 +++++--
src/backend/rewrite/rewriteHandler.c | 87 +++-
src/backend/rewrite/rowsecurity.c | 82 +++-
src/backend/storage/lmgr/lmgr.c | 91 ++++
src/backend/tcop/pquery.c | 17 +-
src/backend/utils/adt/lockfuncs.c | 1 +
src/backend/utils/adt/ruleutils.c | 108 +++--
src/backend/utils/time/tqual.c | 29 +-
src/bin/psql/common.c | 5 +-
src/include/access/heapam.h | 3 +
src/include/access/heapam_xlog.h | 54 ++-
src/include/access/hio.h | 2 +-
src/include/access/htup_details.h | 36 +-
src/include/catalog/catversion.h | 2 +-
src/include/catalog/index.h | 2 +
src/include/executor/executor.h | 13 +-
src/include/nodes/execnodes.h | 15 +
src/include/nodes/nodes.h | 17 +
src/include/nodes/parsenodes.h | 45 +-
src/include/nodes/plannodes.h | 8 +
src/include/nodes/primnodes.h | 42 ++
src/include/optimizer/plancat.h | 2 +
src/include/optimizer/planmain.h | 2 +-
src/include/optimizer/prep.h | 3 +
src/include/parser/kwlist.h | 1 +
src/include/parser/parse_clause.h | 4 +
src/include/replication/reorderbuffer.h | 9 +-
src/include/rewrite/rowsecurity.h | 3 +-
src/include/storage/lmgr.h | 5 +
src/include/storage/lock.h | 10 +
src/include/utils/snapshot.h | 22 +-
.../expected/insert-conflict-do-nothing.out | 23 +
.../expected/insert-conflict-do-update-2.out | 23 +
.../expected/insert-conflict-do-update-3.out | 26 ++
.../expected/insert-conflict-do-update.out | 23 +
src/test/isolation/isolation_schedule | 4 +
.../specs/insert-conflict-do-nothing.spec | 41 ++
.../specs/insert-conflict-do-update-2.spec | 41 ++
.../specs/insert-conflict-do-update-3.spec | 69 +++
.../isolation/specs/insert-conflict-do-update.spec | 40 ++
src/test/regress/expected/errors.out | 4 +-
src/test/regress/expected/insert_conflict.out | 476 +++++++++++++++++++++
src/test/regress/expected/privileges.out | 29 +-
src/test/regress/expected/returning.out | 24 ++
src/test/regress/expected/rowsecurity.out | 132 ++++++
src/test/regress/expected/rules.out | 90 ++++
src/test/regress/expected/subselect.out | 22 +
src/test/regress/expected/triggers.out | 102 ++++-
src/test/regress/expected/updatable_views.out | 61 +++
src/test/regress/expected/update.out | 34 ++
src/test/regress/expected/with.out | 82 ++++
src/test/regress/input/constraints.source | 12 +
src/test/regress/output/constraints.source | 24 +-
src/test/regress/parallel_schedule | 1 +
src/test/regress/serial_schedule | 1 +
src/test/regress/sql/insert_conflict.sql | 284 ++++++++++++
src/test/regress/sql/privileges.sql | 19 +-
src/test/regress/sql/returning.sql | 6 +
src/test/regress/sql/rowsecurity.sql | 112 +++++
src/test/regress/sql/rules.sql | 59 +++
src/test/regress/sql/subselect.sql | 14 +
src/test/regress/sql/triggers.sql | 69 ++-
src/test/regress/sql/updatable_views.sql | 9 +
src/test/regress/sql/update.sql | 21 +
src/test/regress/sql/with.sql | 57 +++
122 files changed, 6106 insertions(+), 435 deletions(-)
create mode 100644 src/test/isolation/expected/insert-conflict-do-nothing.out
create mode 100644 src/test/isolation/expected/insert-conflict-do-update-2.out
create mode 100644 src/test/isolation/expected/insert-conflict-do-update-3.out
create mode 100644 src/test/isolation/expected/insert-conflict-do-update.out
create mode 100644 src/test/isolation/specs/insert-conflict-do-nothing.spec
create mode 100644 src/test/isolation/specs/insert-conflict-do-update-2.spec
create mode 100644 src/test/isolation/specs/insert-conflict-do-update-3.spec
create mode 100644 src/test/isolation/specs/insert-conflict-do-update.spec
create mode 100644 src/test/regress/expected/insert_conflict.out
create mode 100644 src/test/regress/sql/insert_conflict.sql
(limited to 'contrib/postgres_fdw/postgres_fdw.c')
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 76d9e0a5ec..6abe3f0770 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2264,6 +2264,7 @@ JumbleQuery(pgssJumbleState *jstate, Query *query)
JumbleRangeTable(jstate, query->rtable);
JumbleExpr(jstate, (Node *) query->jointree);
JumbleExpr(jstate, (Node *) query->targetList);
+ JumbleExpr(jstate, (Node *) query->onConflict);
JumbleExpr(jstate, (Node *) query->returningList);
JumbleExpr(jstate, (Node *) query->groupClause);
JumbleExpr(jstate, query->havingQual);
@@ -2631,6 +2632,16 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
APP_JUMB(ce->cursor_param);
}
break;
+ case T_InferenceElem:
+ {
+ InferenceElem *ie = (InferenceElem *) node;
+
+ APP_JUMB(ie->infercollid);
+ APP_JUMB(ie->inferopfamily);
+ APP_JUMB(ie->inferopcinputtype);
+ JumbleExpr(jstate, ie->expr);
+ }
+ break;
case T_TargetEntry:
{
TargetEntry *tle = (TargetEntry *) node;
@@ -2667,6 +2678,20 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
JumbleExpr(jstate, from->quals);
}
break;
+ case T_OnConflictExpr:
+ {
+ OnConflictExpr *conf = (OnConflictExpr *) node;
+
+ APP_JUMB(conf->action);
+ JumbleExpr(jstate, (Node *) conf->arbiterElems);
+ JumbleExpr(jstate, conf->arbiterWhere);
+ JumbleExpr(jstate, (Node *) conf->onConflictSet);
+ JumbleExpr(jstate, conf->onConflictWhere);
+ APP_JUMB(conf->constraint);
+ APP_JUMB(conf->exclRelIndex);
+ JumbleExpr(jstate, (Node *) conf->exclRelTlist);
+ }
+ break;
case T_List:
foreach(temp, (List *) node)
{
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 94fab18c42..81cb2b447d 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -847,8 +847,8 @@ appendWhereClause(StringInfo buf,
void
deparseInsertSql(StringInfo buf, PlannerInfo *root,
Index rtindex, Relation rel,
- List *targetAttrs, List *returningList,
- List **retrieved_attrs)
+ List *targetAttrs, bool doNothing,
+ List *returningList, List **retrieved_attrs)
{
AttrNumber pindex;
bool first;
@@ -892,6 +892,9 @@ deparseInsertSql(StringInfo buf, PlannerInfo *root,
else
appendStringInfoString(buf, " DEFAULT VALUES");
+ if (doNothing)
+ appendStringInfoString(buf, " ON CONFLICT DO NOTHING");
+
deparseReturningList(buf, root, rtindex, rel,
rel->trigdesc && rel->trigdesc->trig_insert_after_row,
returningList, retrieved_attrs);
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 93e9836cf0..1f417b30be 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2327,6 +2327,11 @@ INSERT INTO ft1(c1, c2) VALUES(11, 12); -- duplicate key
ERROR: duplicate key value violates unique constraint "t1_pkey"
DETAIL: Key ("C 1")=(11) already exists.
CONTEXT: Remote SQL command: INSERT INTO "S 1"."T 1"("C 1", c2, c3, c4, c5, c6, c7, c8) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
+INSERT INTO ft1(c1, c2) VALUES(11, 12) ON CONFLICT DO NOTHING; -- works
+INSERT INTO ft1(c1, c2) VALUES(11, 12) ON CONFLICT (c1, c2) DO NOTHING; -- unsupported
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+INSERT INTO ft1(c1, c2) VALUES(11, 12) ON CONFLICT (c1, c2) DO UPDATE SET c3 = 'ffg'; -- unsupported
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
INSERT INTO ft1(c1, c2) VALUES(1111, -2); -- c2positive
ERROR: new row for relation "T 1" violates check constraint "c2positive"
DETAIL: Failing row contains (1111, -2, null, null, null, null, ft1 , null).
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index de732319d7..173b4f06e6 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -1171,6 +1171,7 @@ postgresPlanForeignModify(PlannerInfo *root,
List *targetAttrs = NIL;
List *returningList = NIL;
List *retrieved_attrs = NIL;
+ bool doNothing = false;
initStringInfo(&sql);
@@ -1222,6 +1223,18 @@ postgresPlanForeignModify(PlannerInfo *root,
if (plan->returningLists)
returningList = (List *) list_nth(plan->returningLists, subplan_index);
+ /*
+ * ON CONFLICT DO UPDATE and DO NOTHING case with inference specification
+ * should have already been rejected in the optimizer, as presently there
+ * is no way to recognize an arbiter index on a foreign table. Only DO
+ * NOTHING is supported without an inference specification.
+ */
+ if (plan->onConflictAction == ONCONFLICT_NOTHING)
+ doNothing = true;
+ else if (plan->onConflictAction != ONCONFLICT_NONE)
+ elog(ERROR, "unexpected ON CONFLICT specification: %d",
+ (int) plan->onConflictAction);
+
/*
* Construct the SQL command string.
*/
@@ -1229,7 +1242,7 @@ postgresPlanForeignModify(PlannerInfo *root,
{
case CMD_INSERT:
deparseInsertSql(&sql, root, resultRelation, rel,
- targetAttrs, returningList,
+ targetAttrs, doNothing, returningList,
&retrieved_attrs);
break;
case CMD_UPDATE:
diff --git a/contrib/postgres_fdw/postgres_fdw.h b/contrib/postgres_fdw/postgres_fdw.h
index 950c6f79a2..3835ddb79a 100644
--- a/contrib/postgres_fdw/postgres_fdw.h
+++ b/contrib/postgres_fdw/postgres_fdw.h
@@ -60,7 +60,7 @@ extern void appendWhereClause(StringInfo buf,
List **params);
extern void deparseInsertSql(StringInfo buf, PlannerInfo *root,
Index rtindex, Relation rel,
- List *targetAttrs, List *returningList,
+ List *targetAttrs, bool doNothing, List *returningList,
List **retrieved_attrs);
extern void deparseUpdateSql(StringInfo buf, PlannerInfo *root,
Index rtindex, Relation rel,
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 4a23457e79..fcdd92e280 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -372,6 +372,9 @@ UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *;
ALTER TABLE "S 1"."T 1" ADD CONSTRAINT c2positive CHECK (c2 >= 0);
INSERT INTO ft1(c1, c2) VALUES(11, 12); -- duplicate key
+INSERT INTO ft1(c1, c2) VALUES(11, 12) ON CONFLICT DO NOTHING; -- works
+INSERT INTO ft1(c1, c2) VALUES(11, 12) ON CONFLICT (c1, c2) DO NOTHING; -- unsupported
+INSERT INTO ft1(c1, c2) VALUES(11, 12) ON CONFLICT (c1, c2) DO UPDATE SET c3 = 'ffg'; -- unsupported
INSERT INTO ft1(c1, c2) VALUES(1111, -2); -- c2positive
UPDATE ft1 SET c2 = -c2 WHERE c1 = 1; -- c2positive
diff --git a/contrib/test_decoding/expected/ddl.out b/contrib/test_decoding/expected/ddl.out
index 2041ba80b5..463cb5efb9 100644
--- a/contrib/test_decoding/expected/ddl.out
+++ b/contrib/test_decoding/expected/ddl.out
@@ -148,6 +148,24 @@ SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'inc
COMMIT
(9 rows)
+-- ON CONFLICT DO UPDATE support
+BEGIN;
+INSERT INTO replication_example(id, somedata, somenum) SELECT i, i, i FROM generate_series(-15, 15) i
+ ON CONFLICT (id) DO UPDATE SET somenum = excluded.somenum + 1;
+COMMIT;
+/* display results, but hide most of the output */
+SELECT count(*), min(data), max(data)
+FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1')
+GROUP BY substring(data, 1, 40)
+ORDER BY 1,2;
+ count | min | max
+-------+----------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------
+ 1 | BEGIN | BEGIN
+ 1 | COMMIT | COMMIT
+ 15 | table public.replication_example: UPDATE: id[integer]:10 somedata[integer]:4 somenum[integer]:11 zaphod1[integer]:null zaphod2[integer]:null | table public.replication_example: UPDATE: id[integer]:9 somedata[integer]:3 somenum[integer]:10 zaphod1[integer]:null zaphod2[integer]:null
+ 16 | table public.replication_example: INSERT: id[integer]:0 somedata[integer]:0 somenum[integer]:0 zaphod1[integer]:null zaphod2[integer]:null | table public.replication_example: INSERT: id[integer]:-9 somedata[integer]:-9 somenum[integer]:-9 zaphod1[integer]:null zaphod2[integer]:null
+(4 rows)
+
-- hide changes bc of oid visible in full table rewrites
CREATE TABLE tr_unique(id2 serial unique NOT NULL, data int);
INSERT INTO tr_unique(data) VALUES(10);
@@ -196,6 +214,22 @@ ORDER BY 1,2;
20467 | table public.tr_etoomuch: DELETE: id[integer]:1 | table public.tr_etoomuch: UPDATE: id[integer]:9999 data[integer]:-9999
(3 rows)
+-- check that a large, spooled, upsert works
+INSERT INTO tr_etoomuch (id, data)
+SELECT g.i, -g.i FROM generate_series(8000, 12000) g(i)
+ON CONFLICT(id) DO UPDATE SET data = EXCLUDED.data;
+SELECT substring(data, 1, 29), count(*)
+FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1')
+GROUP BY 1
+ORDER BY min(location - '0/0');
+ substring | count
+-------------------------------+-------
+ BEGIN | 1
+ table public.tr_etoomuch: UPD | 2235
+ table public.tr_etoomuch: INS | 1766
+ COMMIT | 1
+(4 rows)
+
/*
* check whether we decode subtransactions correctly in relation with each
* other
diff --git a/contrib/test_decoding/expected/toast.out b/contrib/test_decoding/expected/toast.out
index 0a850b7acd..735b14c978 100644
--- a/contrib/test_decoding/expected/toast.out
+++ b/contrib/test_decoding/expected/toast.out
@@ -23,6 +23,10 @@ INSERT INTO xpto (toasted_col2) SELECT repeat(string_agg(to_char(g.i, 'FM0000'),
-- update of existing column
UPDATE xpto SET toasted_col1 = (SELECT string_agg(g.i::text, '') FROM generate_series(1, 2000) g(i)) WHERE id = 1;
UPDATE xpto SET rand1 = 123.456 WHERE id = 1;
+-- updating external via INSERT ... ON CONFLICT DO UPDATE
+INSERT INTO xpto(id, toasted_col2) VALUES (2, 'toasted2-upsert')
+ON CONFLICT (id)
+DO UPDATE SET toasted_col2 = EXCLUDED.toasted_col2 || xpto.toasted_col2;
DELETE FROM xpto WHERE id = 1;
DROP TABLE IF EXISTS toasted_key;
NOTICE: table "toasted_key" does not exist, skipping
@@ -64,6 +68,9 @@ SELECT substr(data, 1, 200) FROM pg_logical_slot_get_changes('regression_slot',
table public.xpto: UPDATE: id[integer]:1 toasted_col1[text]:unchanged-toast-datum rand1[double precision]:123.456 toasted_col2[text]:unchanged-toast-datum rand2[double precision]:1578
COMMIT
BEGIN
+ table public.xpto: UPDATE: id[integer]:2 toasted_col1[text]:null rand1[double precision]:3077 toasted_col2[text]:'toasted2-upsert00010002000300040005000600070008000900100011001200130014001500160017001
+ COMMIT
+ BEGIN
table public.xpto: DELETE: id[integer]:1
COMMIT
BEGIN
@@ -283,7 +290,7 @@ SELECT substr(data, 1, 200) FROM pg_logical_slot_get_changes('regression_slot',
table public.toasted_copy: INSERT: id[integer]:202 data[text]:'untoasted199'
table public.toasted_copy: INSERT: id[integer]:203 data[text]:'untoasted200'
COMMIT
-(232 rows)
+(235 rows)
SELECT pg_drop_replication_slot('regression_slot');
pg_drop_replication_slot
diff --git a/contrib/test_decoding/sql/ddl.sql b/contrib/test_decoding/sql/ddl.sql
index 03314d18ac..6baad9267a 100644
--- a/contrib/test_decoding/sql/ddl.sql
+++ b/contrib/test_decoding/sql/ddl.sql
@@ -84,6 +84,18 @@ COMMIT;
-- show changes
SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
+-- ON CONFLICT DO UPDATE support
+BEGIN;
+INSERT INTO replication_example(id, somedata, somenum) SELECT i, i, i FROM generate_series(-15, 15) i
+ ON CONFLICT (id) DO UPDATE SET somenum = excluded.somenum + 1;
+COMMIT;
+
+/* display results, but hide most of the output */
+SELECT count(*), min(data), max(data)
+FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1')
+GROUP BY substring(data, 1, 40)
+ORDER BY 1,2;
+
-- hide changes bc of oid visible in full table rewrites
CREATE TABLE tr_unique(id2 serial unique NOT NULL, data int);
INSERT INTO tr_unique(data) VALUES(10);
@@ -114,6 +126,16 @@ FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids',
GROUP BY substring(data, 1, 24)
ORDER BY 1,2;
+-- check that a large, spooled, upsert works
+INSERT INTO tr_etoomuch (id, data)
+SELECT g.i, -g.i FROM generate_series(8000, 12000) g(i)
+ON CONFLICT(id) DO UPDATE SET data = EXCLUDED.data;
+
+SELECT substring(data, 1, 29), count(*)
+FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1')
+GROUP BY 1
+ORDER BY min(location - '0/0');
+
/*
* check whether we decode subtransactions correctly in relation with each
* other
diff --git a/contrib/test_decoding/sql/toast.sql b/contrib/test_decoding/sql/toast.sql
index 09293865df..26d6b4fbdd 100644
--- a/contrib/test_decoding/sql/toast.sql
+++ b/contrib/test_decoding/sql/toast.sql
@@ -25,6 +25,11 @@ UPDATE xpto SET toasted_col1 = (SELECT string_agg(g.i::text, '') FROM generate_s
UPDATE xpto SET rand1 = 123.456 WHERE id = 1;
+-- updating external via INSERT ... ON CONFLICT DO UPDATE
+INSERT INTO xpto(id, toasted_col2) VALUES (2, 'toasted2-upsert')
+ON CONFLICT (id)
+DO UPDATE SET toasted_col2 = EXCLUDED.toasted_col2 || xpto.toasted_col2;
+
DELETE FROM xpto WHERE id = 1;
DROP TABLE IF EXISTS toasted_key;
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 04f3c22433..bc06d2cbb2 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1050,6 +1050,13 @@ GetForeignServerByName(const char *name, bool missing_ok);
source provides.
+
+ INSERT> with an ON CONFLICT> clause does not
+ support specifying the conflict target, as remote constraints are not
+ locally known. This in turn implies that ON CONFLICT DO
+ UPDATE> is not supported, since the specification is mandatory there.
+
+
diff --git a/doc/src/sgml/keywords.sgml b/doc/src/sgml/keywords.sgml
index b0dfd5ff75..ea582116ab 100644
--- a/doc/src/sgml/keywords.sgml
+++ b/doc/src/sgml/keywords.sgml
@@ -853,6 +853,13 @@
+
+ CONFLICT
+ non-reserved
+
+
+
+ CONNECT
diff --git a/doc/src/sgml/mvcc.sgml b/doc/src/sgml/mvcc.sgml
index f88b16e778..313198800c 100644
--- a/doc/src/sgml/mvcc.sgml
+++ b/doc/src/sgml/mvcc.sgml
@@ -326,8 +326,27 @@
- Because of the above rule, it is possible for an updating command to see an
- inconsistent snapshot: it can see the effects of concurrent updating
+ INSERT with an ON CONFLICT DO UPDATE> clause
+ behaves similarly. In Read Committed mode, each row proposed for insertion
+ will either insert or update. Unless there are unrelated errors, one of
+ those two outcomes is guaranteed. If a conflict originates in another
+ transaction whose effects are not yet visible to the INSERT
+ , the UPDATE clause will affect that row,
+ even though possibly no> version of that row is
+ conventionally visible to the command.
+
+
+
+ INSERT with an ON CONFLICT DO
+ NOTHING> clause may have insertion not proceed for a row due to
+ the outcome of another transaction whose effects are not visible
+ to the INSERT snapshot. Again, this is only
+ the case in Read Committed mode.
+
+
+
+ Because of the above rules, it is possible for an updating command to see
+ an inconsistent snapshot: it can see the effects of concurrent updating
commands on the same rows it is trying to update, but it
does not see effects of those commands on other rows in the database.
This behavior makes Read Committed mode unsuitable for commands that
diff --git a/doc/src/sgml/plpgsql.sgml b/doc/src/sgml/plpgsql.sgml
index d36acf6d99..9a7763d18c 100644
--- a/doc/src/sgml/plpgsql.sgml
+++ b/doc/src/sgml/plpgsql.sgml
@@ -2623,7 +2623,11 @@ END;
This example uses exception handling to perform either
- UPDATE> or INSERT>, as appropriate:
+ UPDATE> or INSERT>, as appropriate. It is
+ recommended that applications use INSERT> with
+ ON CONFLICT DO UPDATE> rather than actually using
+ this pattern. This example serves primarily to illustrate use of
+ PL/pgSQL control flow structures:
CREATE TABLE db (a INT PRIMARY KEY, b TEXT);
@@ -3852,9 +3856,11 @@ ASSERT condition , INSERT> and UPDATE> operations, the return value
should be NEW>, which the trigger function may modify to
support INSERT RETURNING> and UPDATE RETURNING>
- (this will also affect the row value passed to any subsequent triggers).
- For DELETE> operations, the return value should be
- OLD>.
+ (this will also affect the row value passed to any subsequent triggers,
+ or passed to a special EXCLUDED> alias reference within
+ an INSERT> statement with an ON CONFLICT DO
+ UPDATE> clause). For DELETE> operations, the return
+ value should be OLD>.
diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml
index 43adb61455..1079140de2 100644
--- a/doc/src/sgml/postgres-fdw.sgml
+++ b/doc/src/sgml/postgres-fdw.sgml
@@ -68,6 +68,14 @@
in your user mapping must have privileges to do these things.)
+
+ Note that postgres_fdw> currently lacks support for
+ INSERT statements with an ON CONFLICT DO
+ UPDATE> clause. However, the ON CONFLICT DO NOTHING>
+ clause is supported, provided a unique index inference specification
+ is omitted.
+
+
It is generally recommended that the columns of a foreign table be declared
with exactly the same data types, and collations if applicable, as the
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index 3a753a0b9b..ac13d3201c 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -2998,9 +2998,16 @@ CommandComplete (B)
INSERT oidrows, where
rows is the number of rows
- inserted. oid is the object ID
- of the inserted row if rows is 1
- and the target table has OIDs;
+ inserted. However, if and only if ON CONFLICT
+ UPDATE> is specified, then the tag is UPSERT
+ oid
+ rows, where
+ rows is the number of rows inserted
+ or updated.
+ oid is the object ID of the
+ inserted row if rows is 1 and the
+ target table has OIDs, and (for the UPSERT
+ tag), the row was actually inserted rather than updated;
otherwise oid is 0.
diff --git a/doc/src/sgml/ref/create_policy.sgml b/doc/src/sgml/ref/create_policy.sgml
index 457911e0c3..e826984633 100644
--- a/doc/src/sgml/ref/create_policy.sgml
+++ b/doc/src/sgml/ref/create_policy.sgml
@@ -78,11 +78,13 @@ CREATE POLICY name ON ON CONFLICT DO
+ UPDATE> and INSERT> policies are not combined in this way, but
+ rather enforced as noted at each stage of ON CONFLICT> execution).
+ Further, for commands which can have both USING and WITH CHECK policies (ALL
+ and UPDATE), if no WITH CHECK policy is defined then the USING policy will be
+ used for both what rows are visible (normal USING case) and which rows will
+ be allowed to be added (WITH CHECK case).
@@ -263,6 +265,12 @@ CREATE POLICY name ON
+
+ Note that INSERT with ON CONFLICT DO
+ UPDATE requires that any INSERT policy
+ WITH CHECK expression passes for any rows appended to the relation by
+ the INSERT path only.
+
@@ -271,22 +279,39 @@ CREATE POLICY name ON
Using UPDATE for a policy means that it will apply
- to UPDATE commands. As UPDATE
- involves pulling an existing record and then making changes to some
- portion (but possibly not all) of the record, the
- UPDATE policy accepts both a USING expression and
- a WITH CHECK expression. The USING expression will be used to
- determine which records the UPDATE command will
- see to operate against, while the WITH CHECK
- expression defines what rows are allowed to be added back into the
- relation (similar to the INSERT policy).
- Any rows whose resulting values do not pass the
- WITH CHECK expression will cause an ERROR and the
- entire command will be aborted. Note that if only a
- USING clause is specified then that clause will be
- used for both USING and
+ to UPDATE commands (or auxiliary ON
+ CONFLICT DO UPDATE clauses of INSERT
+ commands). As UPDATE involves pulling an existing
+ record and then making changes to some portion (but possibly not all)
+ of the record, the UPDATE policy accepts both a
+ USING expression and a WITH CHECK
+ expression. The USING expression will be used to
+ determine which records the UPDATE command will see
+ to operate against, while the WITH CHECK expression
+ defines what rows are allowed to be added back into the relation
+ (similar to the INSERT policy). Any rows whose
+ resulting values do not pass the WITH CHECK
+ expression will cause an ERROR and the entire command will be aborted.
+ Note that if only a USING clause is specified then
+ that clause will be used for both USING and
WITH CHECK cases.
+
+ Note, however, that INSERT with ON CONFLICT
+ DO UPDATE requires that an UPDATE policy
+ USING expression always be enforced as a
+ WITH CHECK expression. This
+ UPDATE policy must always pass when the
+ UPDATE path is taken. Any existing row that
+ necessitates that the UPDATE path be taken must pass
+ the (UPDATE or ALL) USING qualifications (combined
+ using OR), which are always enforced as WTIH CHECK
+ options in this context (the UPDATE path will
+ never> be silently avoided; an error will be thrown
+ instead). Finally, the final row appended to the relation must pass
+ any WITH CHECK options that a conventional
+ UPDATE is required to pass.
+
diff --git a/doc/src/sgml/ref/create_rule.sgml b/doc/src/sgml/ref/create_rule.sgml
index 677766a2d5..53fdf56621 100644
--- a/doc/src/sgml/ref/create_rule.sgml
+++ b/doc/src/sgml/ref/create_rule.sgml
@@ -136,7 +136,11 @@ CREATE [ OR REPLACE ] RULE name AS
The event is one of SELECT,
INSERT, UPDATE, or
- DELETE.
+ DELETE. Note that an
+ INSERT containing an ON
+ CONFLICT clause cannot be used on tables that have
+ either INSERT or UPDATE
+ rules. Consider using an updatable view instead.
diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml
index be7ebd5f54..fac7e1ec5e 100644
--- a/doc/src/sgml/ref/create_table.sgml
+++ b/doc/src/sgml/ref/create_table.sgml
@@ -717,7 +717,9 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI
EXCLUDE>, and
REFERENCES> (foreign key) constraints accept this
clause. NOT NULL> and CHECK> constraints are not
- deferrable.
+ deferrable. Note that deferrable constraints cannot be used as
+ conflict arbitrators in an INSERT statement that
+ includes an ON CONFLICT DO UPDATE> clause.
diff --git a/doc/src/sgml/ref/create_trigger.sgml b/doc/src/sgml/ref/create_trigger.sgml
index aae0b41cd2..4bde815012 100644
--- a/doc/src/sgml/ref/create_trigger.sgml
+++ b/doc/src/sgml/ref/create_trigger.sgml
@@ -76,7 +76,10 @@ CREATE [ CONSTRAINT ] TRIGGER name
executes once for any given operation, regardless of how many rows
it modifies (in particular, an operation that modifies zero rows
will still result in the execution of any applicable FOR
- EACH STATEMENT triggers).
+ EACH STATEMENT triggers). Note that with an
+ INSERT with an ON CONFLICT DO UPDATE>
+ clause, both INSERT and
+ UPDATE statement level trigger will be fired.
diff --git a/doc/src/sgml/ref/create_view.sgml b/doc/src/sgml/ref/create_view.sgml
index 5dadab1dee..8fa3564021 100644
--- a/doc/src/sgml/ref/create_view.sgml
+++ b/doc/src/sgml/ref/create_view.sgml
@@ -333,7 +333,8 @@ CREATE VIEW vista AS SELECT text 'Hello World' AS hello;
If the view is automatically updatable the system will convert any
INSERT>, UPDATE> or DELETE> statement
on the view into the corresponding statement on the underlying base
- relation.
+ relation. INSERT> statements that have an ON
+ CONFLICT UPDATE> clause are fully supported.
@@ -345,8 +346,10 @@ CREATE VIEW vista AS SELECT text 'Hello World' AS hello;
condition, and thus is no longer visible through the view. Similarly,
an INSERT> command can potentially insert base-relation rows
that do not satisfy the WHERE> condition and thus are not
- visible through the view. The CHECK OPTION> may be used to
- prevent INSERT> and UPDATE> commands from creating
+ visible through the view (ON CONFLICT UPDATE> may
+ similarly affect an existing row not visible through the view).
+ The CHECK OPTION> may be used to prevent
+ INSERT> and UPDATE> commands from creating
such rows that are not visible through the view.
diff --git a/doc/src/sgml/ref/insert.sgml b/doc/src/sgml/ref/insert.sgml
index a3cccb9f7c..c88d1b7b50 100644
--- a/doc/src/sgml/ref/insert.sgml
+++ b/doc/src/sgml/ref/insert.sgml
@@ -22,9 +22,24 @@ PostgreSQL documentation
[ WITH [ RECURSIVE ] with_query [, ...] ]
-INSERT INTO table_name [ ( column_name [, ...] ) ]
+INSERT INTO table_name [ AS alias ] [ ( column_name [, ...] ) ]
{ DEFAULT VALUES | VALUES ( { expression | DEFAULT } [, ...] ) [, ...] | query }
+ [ ON CONFLICT [ conflict_target ] conflict_action ]
[ RETURNING * | output_expression [ [ AS ] output_name ] [, ...] ]
+
+where conflict_target can be one of:
+
+ ( { column_name_index | ( expression_index ) } [ COLLATE collation ] [ opclass ] [, ...] ) [ WHERE index_predicate ]
+ ON CONSTRAINT constraint_name
+
+and conflict_action is one of:
+
+ DO NOTHING
+ DO UPDATE SET { column_name = { expression | DEFAULT } |
+ ( column_name [, ...] ) = ( { expression | DEFAULT } [, ...] ) |
+ ( column_name [, ...] ) = ( sub-SELECT )
+ } [, ...]
+ [ WHERE condition ]
@@ -58,20 +73,47 @@ INSERT INTO table_name [ (
+
+ ON CONFLICT> can be used to specify an alternative
+ action to raising a unique constraint or exclusion constraint
+ violation error . (See below.)
+
+
The optional RETURNING> clause causes INSERT>
- to compute and return value(s) based on each row actually inserted.
- This is primarily useful for obtaining values that were supplied by
- defaults, such as a serial sequence number. However, any expression
- using the table's columns is allowed. The syntax of the
- RETURNING> list is identical to that of the output list
- of SELECT>.
+ to compute and return value(s) based on each row actually inserted
+ (or updated, if an ON CONFLICT DO UPDATE> clause was
+ used). This is primarily useful for obtaining values that were
+ supplied by defaults, such as a serial sequence number. However,
+ any expression using the table's columns is allowed. The syntax of
+ the RETURNING> list is identical to that of the output
+ list of SELECT>. Only rows that were successfully
+ inserted or updated will be returned. For example, if a row was
+ locked but not updated because an ON CONFLICT DO UPDATE
+ ... WHERE clause condition was not satisfied, the
+ row will not be returned.
You must have INSERT privilege on a table in
- order to insert into it. If a column list is specified, you only
- need INSERT privilege on the listed columns.
+ order to insert into it. If ON CONFLICT DO UPDATE> is
+ present the UPDATE privilege is also required.
+
+
+
+ If a column list is specified, you only need
+ INSERT privilege on the listed columns.
+ Similarly, when ON CONFLICT DO UPDATE> is specified, you
+ only need UPDATE> privilege on the column(s) that are
+ listed to be updated. However, ON CONFLICT DO UPDATE>
+ also requires SELECT> privilege on any column whose
+ values are read in the ON CONFLICT DO UPDATE>
+ expressions or condition>.
+
+
+
Use of the RETURNING> clause requires SELECT>
privilege on all columns mentioned in RETURNING>.
If you use the table_name [ (
+
+ alias
+
+
+ A substitute name for the target table. When an alias is provided, it
+ completely hides the actual name of the table. This is particularly
+ useful when using ON CONFLICT DO UPDATE into a table
+ named excluded as that's also the name of the
+ pseudo-relation containing the proposed row.
+
+
+
+
+
column_name
@@ -121,7 +177,12 @@ INSERT INTO table_name [ ( table_name.
The column name can be qualified with a subfield name or array
subscript, if needed. (Inserting into only some fields of a
- composite column leaves the other fields null.)
+ composite column leaves the other fields null.) When
+ referencing a column with ON CONFLICT DO UPDATE>, do
+ not include the table's name in the specification of a target
+ column. For example, INSERT ... ON CONFLICT DO UPDATE
+ tab SET table_name.col = 1> is invalid (this follows the general
+ behavior for UPDATE>).
@@ -171,13 +232,34 @@ INSERT INTO table_name [ (
An expression to be computed and returned by the INSERT>
- command after each row is inserted. The expression can use any
- column names of the table named by table_name.
+ command after each row is inserted (not updated). The
+ expression can use any column names of the table named by
+ table_name.
Write *> to return all columns of the inserted row(s).
+
+ conflict_target
+
+
+ Specify which conflicts ON CONFLICT refers to.
+
+
+
+
+
+ conflict_action
+
+
+ DO NOTHING or DO UPDATE
+ SET clause specifying the action to be performed in
+ case of a conflict.
+
+
+
+
output_name
@@ -186,9 +268,226 @@ INSERT INTO table_name [ (
+
+
+ column_name_index
+
+
+ The name of a table_name column. Part of a
+ unique index inference clause. Follows CREATE
+ INDEX format. SELECT> privilege on
+ column_name_index
+ is required.
+
+
+
+
+
+ expression_index
+
+
+ Similar to column_name_index, but used to
+ infer expressions on table_name columns appearing
+ within index definitions (not simple columns). Part of unique
+ index inference clause. Follows CREATE INDEX
+ format. SELECT> privilege on any column appearing
+ within expression_index is required.
+
+
+
+
+
+ collation
+
+
+ When specified, mandates that corresponding column_name_index or
+ expression_index use a
+ particular collation in order to be matched in the inference clause.
+ Typically this is omitted, as collations usually do not affect wether or
+ not a constraint violation occurs. Follows CREATE
+ INDEX format.
+
+
+
+
+
+ opclass
+
+
+ When specified, mandates that corresponding column_name_index or
+ expression_index use
+ particular operator class in order to be matched by the inference
+ clause. Sometimes this is omitted because the
+ equality semantics are often equivalent across a
+ type's operator classes anyway, or because it's sufficient to trust that
+ the defined unique indexes have the pertinent definition of equality.
+ Follows CREATE INDEX format.
+
+
+
+
+
+ index_predicate
+
+
+ Used to allow inference of partial unique indexes. Any indexes
+ that satisfy the predicate (which need not actually be partial
+ indexes) can be matched by the rest of the inference clause.
+ Follows CREATE INDEX format.
+ SELECT> privilege on any column appearing within
+ index_predicate is
+ required.
+
+
+
+
+
+ constraint_name
+
+
+ Explicitly specifies an arbiter constraint
+ by name, rather than inferring a constraint or index. This is
+ mostly useful for exclusion constraints, that cannot be chosen
+ in the conventional way (with an inference clause).
+
+
+
+
+
+ condition
+
+
+ An expression that returns a value of type boolean. Only
+ rows for which this expression returns true will be
+ updated, although all rows will be locked when the
+ ON CONFLICT DO UPDATE> action is taken.
+
+
+
+
+ ON CONFLICT Clause
+
+ UPSERT
+
+
+ ON CONFLICT
+
+
+ The optional ON CONFLICT clause specifies an
+ alternative action to raising a unique violation or exclusion
+ constraint violation error. For each individual row proposed for
+ insertion, either the insertion proceeds, or, if a constraint
+ specified by the conflict_target is
+ violated, the alternative conflict_action is
+ taken.
+
+
+
+ conflict_target describes which conflicts
+ are handled by the ON CONFLICT clause. Either a
+ unique index inference clause or an explicitly
+ named constraint can be used. For ON CONFLICT DO
+ NOTHING, it is optional to specify a
+ conflict_target; when ommitted, conflicts
+ with all usable constraints (and unique indexes) are handled. For
+ ON CONFLICT DO UPDATE, a conflict target
+ must be specified.
+
+ Every time an insertion without ON CONFLICT
+ would ordinarily raise an error due to violating one of the
+ inferred (or explicitly named) constraints, a conflict (as in
+ ON CONFLICT) occurs, and the alternative action,
+ as specified by conflict_action is taken.
+ This happens on a row-by-row basis.
+
+
+
+ A unique index inference clause consists of
+ one or more column_name_index columns and/or
+ expression_index
+ expressions, and a optional
+ index_predicate.
+
+
+
+ All the table_name
+ unique indexes that, without regard to order, contain exactly the
+ specified columns/expressions and, if specified, whose predicate
+ implies the
+ index_predicate are chosen as arbiter indexes. Note
+ that this means an index without a predicate will be used if a
+ non-partial index matching every other criteria happens to be
+ available.
+
+
+
+ If no index matches the inference clause (nor is there a constraint
+ explicitly named), an error is raised. Deferred constraints are
+ not supported as arbiters.
+
+
+
+ conflict_action defines the action to be
+ taken in case of conflict. ON CONFLICT DO
+ NOTHING simply avoids inserting a row as its alternative
+ action. ON CONFLICT DO UPDATE updates the
+ existing row that conflicts with the row proposed for insertion as
+ its alternative action.
+
+ ON CONFLICT DO UPDATE guarantees an atomic
+ INSERT or UPDATE outcome - provided
+ there is no independent error, one of those two outcomes is guaranteed,
+ even under high concurrency. This feature is also known as
+ UPSERT.
+
+ Note that exclusion constraints are not supported with
+ ON CONFLICT DO UPDATE.
+
+
+
+ ON CONFLICT DO UPDATE optionally accepts
+ a WHERE clause condition.
+ When provided, the statement only proceeds with updating if
+ the condition is satisfied. Otherwise, unlike a
+ conventional UPDATE, the row is still locked for update.
+ Note that the condition is evaluated last, after
+ a conflict has been identified as a candidate to update.
+
+
+
+ The SET and WHERE clauses in
+ ON CONFLICT UPDATE have access to the existing
+ row, using the table's name, and to the row
+ proposed for insertion, using the excluded
+ alias. The excluded alias requires
+ SELECT> privilege on any column whose values are read.
+
+ Note that the effects of all per-row BEFORE INSERT
+ triggers are reflected in excluded values, since those
+ effects may have contributed to the row being excluded from insertion.
+
+
+
+ INSERT with an ON CONFLICT DO UPDATE>
+ clause is a deterministic statement. This means
+ that the command will not be allowed to affect any single existing
+ row more than once; a cardinality violation error will be raised
+ when this situation arises. Rows proposed for insertion should not
+ duplicate each other in terms of attributes constrained by the
+ conflict-arbitrating unique index.
+
+
+
Outputs
@@ -197,21 +496,30 @@ INSERT INTO table_name [ (
INSERT oidcount
+
+ However, in the event of an ON CONFLICT DO UPDATE> clause
+ (but not in the event of an ON
+ CONFLICT DO NOTHING> clause), the command tag reports the number of
+ rows inserted or updated together, of the form
+
+UPSERT oidcount
The count is the number
of rows inserted. If count
is exactly one, and the target table has OIDs, then
oid is the
- OID assigned to the inserted row. Otherwise
- oid is zero.
+ OID
+ assigned to the inserted row (but not if there is only a single
+ updated row). Otherwise oid is zero.
If the INSERT> command contains a RETURNING>
clause, the result will be similar to that of a SELECT>
statement containing the columns and values defined in the
- RETURNING> list, computed over the row(s) inserted by the
- command.
+ RETURNING> list, computed over the row(s) inserted or
+ updated by the command.
@@ -311,7 +619,65 @@ WITH upd AS (
RETURNING *
)
INSERT INTO employees_log SELECT *, current_timestamp FROM upd;
-
+
+
+
+ Insert or update new distributors as appropriate. Assumes a unique
+ index has been defined that constrains values appearing in the
+ did column. Note that an EXCLUDED>
+ expression is used to reference values originally proposed for
+ insertion:
+
+ INSERT INTO distributors (did, dname)
+ VALUES (5, 'Gizmo transglobal'), (6, 'Associated Computing, inc')
+ ON CONFLICT (did) DO UPDATE SET dname = EXCLUDED.dname;
+
+
+
+ Insert a distributor, or do nothing for rows proposed for insertion
+ when an existing, excluded row (a row with a matching constrained
+ column or columns after before row insert triggers fire) exists.
+ Example assumes a unique index has been defined that constrains
+ values appearing in the did column:
+
+ INSERT INTO distributors (did, dname) VALUES (7, 'Redline GmbH')
+ ON CONFLICT (did) DO NOTHING;
+
+
+
+ Insert or update new distributors as appropriate. Example assumes
+ a unique index has been defined that constrains values appearing in
+ the did column. WHERE> clause is
+ used to limit the rows actually updated (any existing row not
+ updated will still be locked, though):
+
+ -- Don't update existing distributors based in a certain ZIP code
+ INSERT INTO distributors AS d (did, dname) VALUES (8, 'Anvil Distribution')
+ ON CONFLICT (did) DO UPDATE
+ SET dname = EXCLUDED.dname || ' (formerly ' || d.dname || ')'
+ WHERE d.zipcode != '21201';
+
+ -- Name a constraint directly in the statement (uses associated
+ -- index to arbitrate taking the DO NOTHING action)
+ INSERT INTO distributors (did, dname) VALUES (9, 'Antwerp Design')
+ ON CONFLICT ON CONSTRAINT distributors_pkey DO NOTHING;
+
+
+
+ Insert new distributor if possible; otherwise
+ DO NOTHING. Example assumes a unique index has been
+ defined that constrains values appearing in the
+ did column on a subset of rows where the
+ is_active boolean column evaluates to
+ true:
+
+ -- This statement could infer a partial unique index on "did"
+ -- with a predicate of "WHERE is_active", but it could also
+ -- just use a regular unique constraint on "did"
+ INSERT INTO distributors (did, dname) VALUES (10, 'Conrad International')
+ ON CONFLICT (did) WHERE is_active DO NOTHING;
+
+
@@ -321,7 +687,8 @@ INSERT INTO employees_log SELECT *, current_timestamp FROM upd;
INSERT conforms to the SQL standard, except that
the RETURNING> clause is a
PostgreSQL extension, as is the ability
- to use WITH> with INSERT>.
+ to use WITH> with INSERT>, and the ability to
+ specify an alternative action with ON CONFLICT>.
Also, the case in
which a column name list is omitted, but not all the columns are
filled from the VALUES> clause or query>,
diff --git a/doc/src/sgml/trigger.sgml b/doc/src/sgml/trigger.sgml
index f94aea174a..bd0d71e0d9 100644
--- a/doc/src/sgml/trigger.sgml
+++ b/doc/src/sgml/trigger.sgml
@@ -40,14 +40,17 @@
On tables and foreign tables, triggers can be defined to execute either
before or after any INSERT, UPDATE,
or DELETE operation, either once per modified row,
- or once per SQL statement.
- UPDATE triggers can moreover be set to fire only if
- certain columns are mentioned in the SET clause of the
- UPDATE statement.
- Triggers can also fire for TRUNCATE statements.
- If a trigger event occurs, the trigger's function is called at the
- appropriate time to handle the event. Foreign tables do not support the
- TRUNCATE statement at all.
+ or once per SQL statement. If an
+ INSERT contains an ON CONFLICT DO UPDATE>
+ clause, it is possible that the effects of a BEFORE insert trigger and
+ a BEFORE update trigger can both be applied together, if a reference to
+ an EXCLUDED> column appears. UPDATE
+ triggers can moreover be set to fire only if certain columns are
+ mentioned in the SET clause of the
+ UPDATE statement. Triggers can also fire for
+ TRUNCATE statements. If a trigger event occurs,
+ the trigger's function is called at the appropriate time to handle the
+ event. Foreign tables do not support the TRUNCATE statement at all.
@@ -118,6 +121,35 @@
be operated on.
+
+ If an INSERT contains an ON CONFLICT
+ DO UPDATE> clause, it is possible that the effects of all
+ row-level BEFORE> INSERT triggers
+ and all row-level BEFORE UPDATE triggers can
+ both be applied in a way that is apparent from the final state of
+ the updated row, if an EXCLUDED> column is referenced.
+ There need not be an EXCLUDED> column reference for
+ both sets of BEFORE row-level triggers to execute, though. The
+ possibility of surprising outcomes should be considered when there
+ are both BEFORE> INSERT and
+ BEFORE> UPDATE row-level triggers
+ that both affect a row being inserted/updated (this can still be
+ problematic if the modifications are more or less equivalent if
+ they're not also idempotent). Note that statement-level
+ UPDATE triggers are executed when ON
+ CONFLICT DO UPDATE> is specified, regardless of whether or not
+ any rows were affected by the UPDATE (and
+ regardless of whether the alternative UPDATE
+ path was ever taken). An INSERT with an
+ ON CONFLICT DO UPDATE> clause will execute
+ statement-level BEFORE> INSERT
+ triggers first, then statement-level BEFORE>
+ UPDATE triggers, followed by statement-level
+ AFTER> UPDATE triggers and finally
+ statement-level AFTER> INSERT
+ triggers.
+
+
Trigger functions invoked by per-statement triggers should always
return NULL. Trigger functions invoked by per-row
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index e84c1743f4..7ea9a77e7e 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2061,8 +2061,17 @@ FreeBulkInsertState(BulkInsertState bistate)
* This causes rows to be frozen, which is an MVCC violation and
* requires explicit options chosen by user.
*
+ * HEAP_INSERT_IS_SPECULATIVE is used on so-called "speculative insertions",
+ * which can be backed out afterwards without aborting the whole transaction.
+ * Other sessions can wait for the speculative insertion to be confirmed,
+ * turning it into a regular tuple, or aborted, as if it never existed.
+ * Speculatively inserted tuples behave as "value locks" of short duration,
+ * used to implement INSERT .. ON CONFLICT.
+ *
* Note that these options will be applied when inserting into the heap's
* TOAST table, too, if the tuple requires any out-of-line data.
+ * FIXME: Do we mark TOAST tuples as speculative too? What about confirming
+ * or aborting them?
*
* The BulkInsertState object (if any; bistate can be NULL for default
* behavior) is also just passed through to RelationGetBufferForTuple.
@@ -2115,7 +2124,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
/* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION();
- RelationPutHeapTuple(relation, buffer, heaptup);
+ RelationPutHeapTuple(relation, buffer, heaptup,
+ (options & HEAP_INSERT_SPECULATIVE) != 0);
if (PageIsAllVisible(BufferGetPage(buffer)))
{
@@ -2169,7 +2179,11 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
}
xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self);
- xlrec.flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0;
+ xlrec.flags = 0;
+ if (all_visible_cleared)
+ xlrec.flags |= XLH_INSERT_ALL_VISIBLE_CLEARED;
+ if (options & HEAP_INSERT_SPECULATIVE)
+ xlrec.flags |= XLH_INSERT_IS_SPECULATIVE;
Assert(ItemPointerGetBlockNumber(&heaptup->t_self) == BufferGetBlockNumber(buffer));
/*
@@ -2179,7 +2193,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
*/
if (RelationIsLogicallyLogged(relation))
{
- xlrec.flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE;
+ xlrec.flags |= XLH_INSERT_CONTAINS_NEW_TUPLE;
bufflags |= REGBUF_KEEP_DATA;
}
@@ -2224,6 +2238,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
*/
CacheInvalidateHeapTuple(relation, heaptup, NULL);
+ /* Note: speculative insertions are counted too, even if aborted later */
pgstat_count_heap_insert(relation, 1);
/*
@@ -2395,7 +2410,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
* RelationGetBufferForTuple has ensured that the first tuple fits.
* Put that on the page, and then as many other tuples as fit.
*/
- RelationPutHeapTuple(relation, buffer, heaptuples[ndone]);
+ RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false);
for (nthispage = 1; ndone + nthispage < ntuples; nthispage++)
{
HeapTuple heaptup = heaptuples[ndone + nthispage];
@@ -2403,7 +2418,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace)
break;
- RelationPutHeapTuple(relation, buffer, heaptup);
+ RelationPutHeapTuple(relation, buffer, heaptup, false);
/*
* We don't use heap_multi_insert for catalog tuples yet, but
@@ -2463,7 +2478,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
/* the rest of the scratch space is used for tuple data */
tupledata = scratchptr;
- xlrec->flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0;
+ xlrec->flags = all_visible_cleared ? XLH_INSERT_ALL_VISIBLE_CLEARED : 0;
xlrec->ntuples = nthispage;
/*
@@ -2498,7 +2513,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
Assert((scratchptr - scratch) < BLCKSZ);
if (need_tuple_data)
- xlrec->flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE;
+ xlrec->flags |= XLH_INSERT_CONTAINS_NEW_TUPLE;
/*
* Signal that this is the last xl_heap_multi_insert record
@@ -2506,7 +2521,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
* decoding so it knows when to cleanup temporary data.
*/
if (ndone + nthispage == ntuples)
- xlrec->flags |= XLOG_HEAP_LAST_MULTI_INSERT;
+ xlrec->flags |= XLH_INSERT_LAST_IN_MULTI;
if (init)
{
@@ -2914,7 +2929,12 @@ l1:
MarkBufferDirty(buffer);
- /* XLOG stuff */
+ /*
+ * XLOG stuff
+ *
+ * NB: heap_abort_speculative() uses the same xlog record and replay
+ * routines.
+ */
if (RelationNeedsWAL(relation))
{
xl_heap_delete xlrec;
@@ -2924,7 +2944,7 @@ l1:
if (RelationIsAccessibleInLogicalDecoding(relation))
log_heap_new_cid(relation, &tp);
- xlrec.flags = all_visible_cleared ? XLOG_HEAP_ALL_VISIBLE_CLEARED : 0;
+ xlrec.flags = all_visible_cleared ? XLH_DELETE_ALL_VISIBLE_CLEARED : 0;
xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
tp.t_data->t_infomask2);
xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self);
@@ -2933,9 +2953,9 @@ l1:
if (old_key_tuple != NULL)
{
if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
- xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_TUPLE;
+ xlrec.flags |= XLH_DELETE_CONTAINS_OLD_TUPLE;
else
- xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_KEY;
+ xlrec.flags |= XLH_DELETE_CONTAINS_OLD_KEY;
}
XLogBeginInsert();
@@ -3742,7 +3762,7 @@ l2:
HeapTupleClearHeapOnly(newtup);
}
- RelationPutHeapTuple(relation, newbuf, heaptup); /* insert new tuple */
+ RelationPutHeapTuple(relation, newbuf, heaptup, false); /* insert new tuple */
if (!already_marked)
{
@@ -4133,14 +4153,16 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
*
* Function result may be:
* HeapTupleMayBeUpdated: lock was successfully acquired
+ * HeapTupleInvisible: lock failed because tuple was never visible to us
* HeapTupleSelfUpdated: lock failed because tuple updated by self
* HeapTupleUpdated: lock failed because tuple updated by other xact
* HeapTupleWouldBlock: lock couldn't be acquired and wait_policy is skip
*
- * In the failure cases, the routine fills *hufd with the tuple's t_ctid,
- * t_xmax (resolving a possible MultiXact, if necessary), and t_cmax
- * (the last only for HeapTupleSelfUpdated, since we
- * cannot obtain cmax from a combocid generated by another transaction).
+ * In the failure cases other than HeapTupleInvisible, the routine fills
+ * *hufd with the tuple's t_ctid, t_xmax (resolving a possible MultiXact,
+ * if necessary), and t_cmax (the last only for HeapTupleSelfUpdated,
+ * since we cannot obtain cmax from a combocid generated by another
+ * transaction).
* See comments for struct HeapUpdateFailureData for additional info.
*
* See README.tuplock for a thorough explanation of this mechanism.
@@ -4179,8 +4201,15 @@ l3:
if (result == HeapTupleInvisible)
{
- UnlockReleaseBuffer(*buffer);
- elog(ERROR, "attempted to lock invisible tuple");
+ LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
+
+ /*
+ * This is possible, but only when locking a tuple for ON CONFLICT
+ * UPDATE. We return this value here rather than throwing an error in
+ * order to give that case the opportunity to throw a more specific
+ * error.
+ */
+ return HeapTupleInvisible;
}
else if (result == HeapTupleBeingUpdated)
{
@@ -5417,6 +5446,234 @@ heap_lock_updated_tuple(Relation rel, HeapTuple tuple, ItemPointer ctid,
return HeapTupleMayBeUpdated;
}
+/*
+ * heap_finish_speculative - mark speculative insertion as successful
+ *
+ * To successfully finish a speculative insertion we have to clear speculative
+ * token from tuple. To do so the t_ctid field, which will contain a
+ * speculative token value, is modified in place to point to the tuple itself,
+ * which is characteristic of a newly inserted ordinary tuple.
+ *
+ * NB: It is not ok to commit without either finishing or aborting a
+ * speculative insertion. We could treat speculative tuples of committed
+ * transactions implicitly as completed, but then we would have to be prepared
+ * to deal with speculative tokens on committed tuples. That wouldn't be
+ * difficult - no-one looks at the ctid field of a tuple with invalid xmax -
+ * but clearing the token at completion isn't very expensive either.
+ * An explicit confirmation WAL record also makes logical decoding simpler.
+ */
+void
+heap_finish_speculative(Relation relation, HeapTuple tuple)
+{
+ Buffer buffer;
+ Page page;
+ OffsetNumber offnum;
+ ItemId lp = NULL;
+ HeapTupleHeader htup;
+
+ buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&(tuple->t_self)));
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ page = (Page) BufferGetPage(buffer);
+
+ offnum = ItemPointerGetOffsetNumber(&(tuple->t_self));
+ if (PageGetMaxOffsetNumber(page) >= offnum)
+ lp = PageGetItemId(page, offnum);
+
+ if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
+ elog(ERROR, "heap_confirm_insert: invalid lp");
+
+ htup = (HeapTupleHeader) PageGetItem(page, lp);
+
+ /* SpecTokenOffsetNumber should be distinguishable from any real offset */
+ StaticAssertStmt(MaxOffsetNumber < SpecTokenOffsetNumber,
+ "invalid speculative token constant");
+
+ /* NO EREPORT(ERROR) from here till changes are logged */
+ START_CRIT_SECTION();
+
+ Assert(HeapTupleHeaderIsSpeculative(tuple->t_data));
+
+ MarkBufferDirty(buffer);
+
+ /*
+ * Replace the speculative insertion token with a real t_ctid,
+ * pointing to itself like it does on regular tuples.
+ */
+ htup->t_ctid = tuple->t_self;
+
+ /* XLOG stuff */
+ if (RelationNeedsWAL(relation))
+ {
+ xl_heap_confirm xlrec;
+ XLogRecPtr recptr;
+
+ xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
+
+ XLogBeginInsert();
+
+ /* We want the same filtering on this as on a plain insert */
+ XLogIncludeOrigin();
+
+ XLogRegisterData((char *) &xlrec, SizeOfHeapConfirm);
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+
+ recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CONFIRM);
+
+ PageSetLSN(page, recptr);
+ }
+
+ END_CRIT_SECTION();
+
+ UnlockReleaseBuffer(buffer);
+}
+
+/*
+ * heap_abort_speculative - kill a speculatively inserted tuple
+ *
+ * Marks a tuple that was speculatively inserted in the same command as dead,
+ * by setting its xmin as invalid. That makes it immediately appear as dead
+ * to all transactions, including our own. In particular, it makes
+ * HeapTupleSatisfiesDirty() regard the tuple as dead, so that another backend
+ * inserting a duplicate key value won't unnecessarily wait for our whole
+ * transaction to finish (it'll just wait for our speculative insertion to
+ * finish).
+ *
+ * Killing the tuple prevents "unprincipled deadlocks", which are deadlocks
+ * that arise due to a mutual dependency that is not user visible. By
+ * definition, unprincipled deadlocks cannot be prevented by the user
+ * reordering lock acquisition in client code, because the implementation level
+ * lock acquisitions are not under the user's direct control. If speculative
+ * inserters did not take this precaution, then under high concurrency they
+ * could deadlock with each other, which would not be acceptable.
+ *
+ * This is somewhat redundant with heap_delete, but we prefer to have a
+ * dedicated routine with stripped down requirements.
+ *
+ * This routine does not affect logical decoding as it only looks at
+ * confirmation records.
+ */
+void
+heap_abort_speculative(Relation relation, HeapTuple tuple)
+{
+ TransactionId xid = GetCurrentTransactionId();
+ ItemPointer tid = &(tuple->t_self);
+ ItemId lp;
+ HeapTupleData tp;
+ Page page;
+ BlockNumber block;
+ Buffer buffer;
+
+ Assert(ItemPointerIsValid(tid));
+
+ block = ItemPointerGetBlockNumber(tid);
+ buffer = ReadBuffer(relation, block);
+ page = BufferGetPage(buffer);
+
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ /*
+ * Page can't be all visible, we just inserted into it, and are still
+ * running.
+ */
+ Assert(!PageIsAllVisible(page));
+
+ lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
+ Assert(ItemIdIsNormal(lp));
+
+ tp.t_tableOid = RelationGetRelid(relation);
+ tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
+ tp.t_len = ItemIdGetLength(lp);
+ tp.t_self = *tid;
+
+ /*
+ * Sanity check that the tuple really is a speculatively inserted tuple,
+ * inserted by us.
+ */
+ if (tp.t_data->t_choice.t_heap.t_xmin != xid)
+ elog(ERROR, "attempted to kill a tuple inserted by another transaction");
+ if (!HeapTupleHeaderIsSpeculative(tp.t_data))
+ elog(ERROR, "attempted to kill a non-speculative tuple");
+ Assert(!HeapTupleHeaderIsHeapOnly(tp.t_data));
+
+ /*
+ * No need to check for serializable conflicts here. There is never a
+ * need for a combocid, either. No need to extract replica identity, or
+ * do anything special with infomask bits.
+ */
+
+ START_CRIT_SECTION();
+
+ /*
+ * The tuple will become DEAD immediately. Flag that this page
+ * immediately is a candidate for pruning by setting xmin to
+ * RecentGlobalXmin. That's not pretty, but it doesn't seem worth
+ * inventing a nicer API for this.
+ */
+ Assert(TransactionIdIsValid(RecentGlobalXmin));
+ PageSetPrunable(page, RecentGlobalXmin);
+
+ /* store transaction information of xact deleting the tuple */
+ tp.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
+ tp.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
+
+ /*
+ * Set the tuple header xmin to InvalidTransactionId. This makes the
+ * tuple immediately invisible everyone. (In particular, to any
+ * transactions waiting on the speculative token, woken up later.)
+ */
+ HeapTupleHeaderSetXmin(tp.t_data, InvalidTransactionId);
+
+ /* Clear the speculative insertion token too */
+ tp.t_data->t_ctid = tp.t_self;
+
+ MarkBufferDirty(buffer);
+
+ /*
+ * XLOG stuff
+ *
+ * The WAL records generated here match heap_delete(). The same recovery
+ * routines are used.
+ */
+ if (RelationNeedsWAL(relation))
+ {
+ xl_heap_delete xlrec;
+ XLogRecPtr recptr;
+
+ xlrec.flags = XLH_DELETE_IS_SUPER;
+ xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
+ tp.t_data->t_infomask2);
+ xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self);
+ xlrec.xmax = xid;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+
+ /* No replica identity & replication origin logged */
+
+ recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
+
+ PageSetLSN(page, recptr);
+ }
+
+ END_CRIT_SECTION();
+
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+
+ if (HeapTupleHasExternal(&tp))
+ toast_delete(relation, &tp);
+
+ /*
+ * Never need to mark tuple for invalidation, since catalogs don't support
+ * speculative insertion
+ */
+
+ /* Now we can release the buffer */
+ ReleaseBuffer(buffer);
+
+ /* count deletion, as we counted the insertion too */
+ pgstat_count_heap_delete(relation);
+}
/*
* heap_inplace_update - update a tuple "in place" (ie, overwrite it)
@@ -6732,22 +6989,22 @@ log_heap_update(Relation reln, Buffer oldbuf,
/* Prepare main WAL data chain */
xlrec.flags = 0;
if (all_visible_cleared)
- xlrec.flags |= XLOG_HEAP_ALL_VISIBLE_CLEARED;
+ xlrec.flags |= XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED;
if (new_all_visible_cleared)
- xlrec.flags |= XLOG_HEAP_NEW_ALL_VISIBLE_CLEARED;
+ xlrec.flags |= XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED;
if (prefixlen > 0)
- xlrec.flags |= XLOG_HEAP_PREFIX_FROM_OLD;
+ xlrec.flags |= XLH_UPDATE_PREFIX_FROM_OLD;
if (suffixlen > 0)
- xlrec.flags |= XLOG_HEAP_SUFFIX_FROM_OLD;
+ xlrec.flags |= XLH_UPDATE_SUFFIX_FROM_OLD;
if (need_tuple_data)
{
- xlrec.flags |= XLOG_HEAP_CONTAINS_NEW_TUPLE;
+ xlrec.flags |= XLH_UPDATE_CONTAINS_NEW_TUPLE;
if (old_key_tuple)
{
if (reln->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
- xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_TUPLE;
+ xlrec.flags |= XLH_UPDATE_CONTAINS_OLD_TUPLE;
else
- xlrec.flags |= XLOG_HEAP_CONTAINS_OLD_KEY;
+ xlrec.flags |= XLH_UPDATE_CONTAINS_OLD_KEY;
}
}
@@ -7378,7 +7635,7 @@ heap_xlog_delete(XLogReaderState *record)
* The visibility map may need to be fixed even if the heap page is
* already up-to-date.
*/
- if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
+ if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
{
Relation reln = CreateFakeRelcacheEntry(target_node);
Buffer vmbuffer = InvalidBuffer;
@@ -7406,13 +7663,16 @@ heap_xlog_delete(XLogReaderState *record)
HeapTupleHeaderClearHotUpdated(htup);
fix_infomask_from_infobits(xlrec->infobits_set,
&htup->t_infomask, &htup->t_infomask2);
- HeapTupleHeaderSetXmax(htup, xlrec->xmax);
+ if (!(xlrec->flags & XLH_DELETE_IS_SUPER))
+ HeapTupleHeaderSetXmax(htup, xlrec->xmax);
+ else
+ HeapTupleHeaderSetXmin(htup, InvalidTransactionId);
HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
/* Mark the page as a candidate for pruning */
PageSetPrunable(page, XLogRecGetXid(record));
- if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
+ if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
/* Make sure there is no forward chain link in t_ctid */
@@ -7453,7 +7713,7 @@ heap_xlog_insert(XLogReaderState *record)
* The visibility map may need to be fixed even if the heap page is
* already up-to-date.
*/
- if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
+ if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
{
Relation reln = CreateFakeRelcacheEntry(target_node);
Buffer vmbuffer = InvalidBuffer;
@@ -7516,7 +7776,7 @@ heap_xlog_insert(XLogReaderState *record)
PageSetLSN(page, lsn);
- if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
+ if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
MarkBufferDirty(buffer);
@@ -7573,7 +7833,7 @@ heap_xlog_multi_insert(XLogReaderState *record)
* The visibility map may need to be fixed even if the heap page is
* already up-to-date.
*/
- if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
+ if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
{
Relation reln = CreateFakeRelcacheEntry(rnode);
Buffer vmbuffer = InvalidBuffer;
@@ -7655,7 +7915,7 @@ heap_xlog_multi_insert(XLogReaderState *record)
PageSetLSN(page, lsn);
- if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
+ if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
MarkBufferDirty(buffer);
@@ -7728,7 +7988,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
* The visibility map may need to be fixed even if the heap page is
* already up-to-date.
*/
- if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
+ if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
{
Relation reln = CreateFakeRelcacheEntry(rnode);
Buffer vmbuffer = InvalidBuffer;
@@ -7783,7 +8043,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
/* Mark the page as a candidate for pruning */
PageSetPrunable(page, XLogRecGetXid(record));
- if (xlrec->flags & XLOG_HEAP_ALL_VISIBLE_CLEARED)
+ if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
PageSetLSN(page, lsn);
@@ -7812,7 +8072,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
* The visibility map may need to be fixed even if the heap page is
* already up-to-date.
*/
- if (xlrec->flags & XLOG_HEAP_NEW_ALL_VISIBLE_CLEARED)
+ if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
{
Relation reln = CreateFakeRelcacheEntry(rnode);
Buffer vmbuffer = InvalidBuffer;
@@ -7840,13 +8100,13 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
if (PageGetMaxOffsetNumber(page) + 1 < offnum)
elog(PANIC, "heap_update_redo: invalid max offset number");
- if (xlrec->flags & XLOG_HEAP_PREFIX_FROM_OLD)
+ if (xlrec->flags & XLH_UPDATE_PREFIX_FROM_OLD)
{
Assert(newblk == oldblk);
memcpy(&prefixlen, recdata, sizeof(uint16));
recdata += sizeof(uint16);
}
- if (xlrec->flags & XLOG_HEAP_SUFFIX_FROM_OLD)
+ if (xlrec->flags & XLH_UPDATE_SUFFIX_FROM_OLD)
{
Assert(newblk == oldblk);
memcpy(&suffixlen, recdata, sizeof(uint16));
@@ -7918,7 +8178,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
if (offnum == InvalidOffsetNumber)
elog(PANIC, "heap_update_redo: failed to add tuple");
- if (xlrec->flags & XLOG_HEAP_NEW_ALL_VISIBLE_CLEARED)
+ if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
@@ -7951,6 +8211,42 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
XLogRecordPageWithFreeSpace(rnode, newblk, freespace);
}
+static void
+heap_xlog_confirm(XLogReaderState *record)
+{
+ XLogRecPtr lsn = record->EndRecPtr;
+ xl_heap_confirm *xlrec = (xl_heap_confirm *) XLogRecGetData(record);
+ Buffer buffer;
+ Page page;
+ OffsetNumber offnum;
+ ItemId lp = NULL;
+ HeapTupleHeader htup;
+
+ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
+ {
+ page = BufferGetPage(buffer);
+
+ offnum = xlrec->offnum;
+ if (PageGetMaxOffsetNumber(page) >= offnum)
+ lp = PageGetItemId(page, offnum);
+
+ if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
+ elog(PANIC, "heap_confirm_redo: invalid lp");
+
+ htup = (HeapTupleHeader) PageGetItem(page, lp);
+
+ /*
+ * Confirm tuple as actually inserted
+ */
+ ItemPointerSet(&htup->t_ctid, BufferGetBlockNumber(buffer), offnum);
+
+ PageSetLSN(page, lsn);
+ MarkBufferDirty(buffer);
+ }
+ if (BufferIsValid(buffer))
+ UnlockReleaseBuffer(buffer);
+}
+
static void
heap_xlog_lock(XLogReaderState *record)
{
@@ -8101,6 +8397,9 @@ heap_redo(XLogReaderState *record)
case XLOG_HEAP_HOT_UPDATE:
heap_xlog_update(record, true);
break;
+ case XLOG_HEAP_CONFIRM:
+ heap_xlog_confirm(record);
+ break;
case XLOG_HEAP_LOCK:
heap_xlog_lock(record);
break;
diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c
index 6d091f63af..a9f0ca35e4 100644
--- a/src/backend/access/heap/hio.c
+++ b/src/backend/access/heap/hio.c
@@ -35,12 +35,17 @@
void
RelationPutHeapTuple(Relation relation,
Buffer buffer,
- HeapTuple tuple)
+ HeapTuple tuple,
+ bool token)
{
Page pageHeader;
OffsetNumber offnum;
- ItemId itemId;
- Item item;
+
+ /*
+ * A tuple that's being inserted speculatively should already have its
+ * token set.
+ */
+ Assert(!token || HeapTupleHeaderIsSpeculative(tuple->t_data));
/* Add the tuple to the page */
pageHeader = BufferGetPage(buffer);
@@ -54,10 +59,18 @@ RelationPutHeapTuple(Relation relation,
/* Update tuple->t_self to the actual position where it was stored */
ItemPointerSet(&(tuple->t_self), BufferGetBlockNumber(buffer), offnum);
- /* Insert the correct position into CTID of the stored tuple, too */
- itemId = PageGetItemId(pageHeader, offnum);
- item = PageGetItem(pageHeader, itemId);
- ((HeapTupleHeader) item)->t_ctid = tuple->t_self;
+ /*
+ * Insert the correct position into CTID of the stored tuple, too
+ * (unless this is a speculative insertion, in which case the token is
+ * held in CTID field instead)
+ */
+ if (!token)
+ {
+ ItemId itemId = PageGetItemId(pageHeader, offnum);
+ Item item = PageGetItem(pageHeader, itemId);
+
+ ((HeapTupleHeader) item)->t_ctid = tuple->t_self;
+ }
}
/*
diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index 8464e8794f..274155ad0c 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -522,6 +522,14 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
bool toast_free[MaxHeapAttributeNumber];
bool toast_delold[MaxHeapAttributeNumber];
+ /*
+ * Ignore the INSERT_SPECULATIVE option. Speculative insertions/super
+ * deletions just normally insert/delete the toast values. It seems
+ * easiest to deal with that here, instead on, potentially, multiple
+ * callers.
+ */
+ options &= ~HEAP_INSERT_SPECULATIVE;
+
/*
* We should only ever be called for tuples of plain relations or
* materialized views --- recursing on a toast rel is bad news.
diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c
index ef68a7145f..4a60c5fa2c 100644
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -51,7 +51,8 @@ static Buffer _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf);
static TransactionId _bt_check_unique(Relation rel, IndexTuple itup,
Relation heapRel, Buffer buf, OffsetNumber offset,
ScanKey itup_scankey,
- IndexUniqueCheck checkUnique, bool *is_unique);
+ IndexUniqueCheck checkUnique, bool *is_unique,
+ uint32 *speculativeToken);
static void _bt_findinsertloc(Relation rel,
Buffer *bufptr,
OffsetNumber *offsetptr,
@@ -159,17 +160,27 @@ top:
*/
if (checkUnique != UNIQUE_CHECK_NO)
{
- TransactionId xwait;
+ TransactionId xwait;
+ uint32 speculativeToken;
offset = _bt_binsrch(rel, buf, natts, itup_scankey, false);
xwait = _bt_check_unique(rel, itup, heapRel, buf, offset, itup_scankey,
- checkUnique, &is_unique);
+ checkUnique, &is_unique, &speculativeToken);
if (TransactionIdIsValid(xwait))
{
/* Have to wait for the other guy ... */
_bt_relbuf(rel, buf);
- XactLockTableWait(xwait, rel, &itup->t_tid, XLTW_InsertIndex);
+ /*
+ * If it's a speculative insertion, wait for it to finish (ie.
+ * to go ahead with the insertion, or kill the tuple). Otherwise
+ * wait for the transaction to finish as usual.
+ */
+ if (speculativeToken)
+ SpeculativeInsertionWait(xwait, speculativeToken);
+ else
+ XactLockTableWait(xwait, rel, &itup->t_tid, XLTW_InsertIndex);
+
/* start over... */
_bt_freestack(stack);
goto top;
@@ -213,7 +224,10 @@ top:
*
* Returns InvalidTransactionId if there is no conflict, else an xact ID
* we must wait for to see if it commits a conflicting tuple. If an actual
- * conflict is detected, no return --- just ereport().
+ * conflict is detected, no return --- just ereport(). If an xact ID is
+ * returned, and the conflicting tuple still has a speculative insertion in
+ * progress, *speculativeToken is set to non-zero, and the caller can wait for
+ * the verdict on the insertion using SpeculativeInsertionWait().
*
* However, if checkUnique == UNIQUE_CHECK_PARTIAL, we always return
* InvalidTransactionId because we don't want to wait. In this case we
@@ -223,7 +237,8 @@ top:
static TransactionId
_bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
Buffer buf, OffsetNumber offset, ScanKey itup_scankey,
- IndexUniqueCheck checkUnique, bool *is_unique)
+ IndexUniqueCheck checkUnique, bool *is_unique,
+ uint32 *speculativeToken)
{
TupleDesc itupdesc = RelationGetDescr(rel);
int natts = rel->rd_rel->relnatts;
@@ -340,6 +355,7 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel,
if (nbuf != InvalidBuffer)
_bt_relbuf(rel, nbuf);
/* Tell _bt_doinsert to wait... */
+ *speculativeToken = SnapshotDirty.speculativeToken;
return xwait;
}
diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c
index 4f06a2637a..f4a1b002cf 100644
--- a/src/backend/access/rmgrdesc/heapdesc.c
+++ b/src/backend/access/rmgrdesc/heapdesc.c
@@ -75,6 +75,12 @@ heap_desc(StringInfo buf, XLogReaderState *record)
xlrec->new_offnum,
xlrec->new_xmax);
}
+ else if (info == XLOG_HEAP_CONFIRM)
+ {
+ xl_heap_confirm *xlrec = (xl_heap_confirm *) rec;
+
+ appendStringInfo(buf, "off %u", xlrec->offnum);
+ }
else if (info == XLOG_HEAP_LOCK)
{
xl_heap_lock *xlrec = (xl_heap_lock *) rec;
@@ -177,6 +183,9 @@ heap_identify(uint8 info)
case XLOG_HEAP_HOT_UPDATE | XLOG_HEAP_INIT_PAGE:
id = "HOT_UPDATE+INIT";
break;
+ case XLOG_HEAP_CONFIRM:
+ id = "HEAP_CONFIRM";
+ break;
case XLOG_HEAP_LOCK:
id = "LOCK";
break;
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index ac3b785b5a..8c8a9eafee 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -1665,6 +1665,10 @@ BuildIndexInfo(Relation index)
/* other info */
ii->ii_Unique = indexStruct->indisunique;
ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
+ /* assume not doing speculative insertion for now */
+ ii->ii_UniqueOps = NULL;
+ ii->ii_UniqueProcs = NULL;
+ ii->ii_UniqueStrats = NULL;
/* initialize index-build state to default */
ii->ii_Concurrent = false;
@@ -1673,6 +1677,53 @@ BuildIndexInfo(Relation index)
return ii;
}
+/* ----------------
+ * BuildSpeculativeIndexInfo
+ * Add extra state to IndexInfo record
+ *
+ * For unique indexes, we usually don't want to add info to the IndexInfo for
+ * checking uniqueness, since the B-Tree AM handles that directly. However,
+ * in the case of speculative insertion, additional support is required.
+ *
+ * Do this processing here rather than in BuildIndexInfo() to not incur the
+ * overhead in the common non-speculative cases.
+ * ----------------
+ */
+void
+BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii)
+{
+ int ncols = index->rd_rel->relnatts;
+ int i;
+
+ /*
+ * fetch info for checking unique indexes
+ */
+ Assert(ii->ii_Unique);
+
+ if (index->rd_rel->relam != BTREE_AM_OID)
+ elog(ERROR, "unexpected non-btree speculative unique index");
+
+ ii->ii_UniqueOps = (Oid *) palloc(sizeof(Oid) * ncols);
+ ii->ii_UniqueProcs = (Oid *) palloc(sizeof(Oid) * ncols);
+ ii->ii_UniqueStrats = (uint16 *) palloc(sizeof(uint16) * ncols);
+
+ /*
+ * We have to look up the operator's strategy number. This
+ * provides a cross-check that the operator does match the index.
+ */
+ /* We need the func OIDs and strategy numbers too */
+ for (i = 0; i < ncols; i++)
+ {
+ ii->ii_UniqueStrats[i] = BTEqualStrategyNumber;
+ ii->ii_UniqueOps[i] =
+ get_opfamily_member(index->rd_opfamily[i],
+ index->rd_opcintype[i],
+ index->rd_opcintype[i],
+ ii->ii_UniqueStrats[i]);
+ ii->ii_UniqueProcs[i] = get_opcode(ii->ii_UniqueOps[i]);
+ }
+}
+
/* ----------------
* FormIndexDatum
* Construct values[] and isnull[] arrays for a new index tuple.
@@ -2612,7 +2663,7 @@ IndexCheckExclusion(Relation heapRelation,
check_exclusion_constraint(heapRelation,
indexRelation, indexInfo,
&(heapTuple->t_self), values, isnull,
- estate, true, false);
+ estate, true);
}
heap_endscan(scan);
diff --git a/src/backend/catalog/indexing.c b/src/backend/catalog/indexing.c
index fe123addac..0231084c7c 100644
--- a/src/backend/catalog/indexing.c
+++ b/src/backend/catalog/indexing.c
@@ -46,7 +46,7 @@ CatalogOpenIndexes(Relation heapRel)
resultRelInfo->ri_RelationDesc = heapRel;
resultRelInfo->ri_TrigDesc = NULL; /* we don't fire triggers */
- ExecOpenIndices(resultRelInfo);
+ ExecOpenIndices(resultRelInfo, false);
return resultRelInfo;
}
diff --git a/src/backend/catalog/sql_features.txt b/src/backend/catalog/sql_features.txt
index 332926424b..cc0f8c45a6 100644
--- a/src/backend/catalog/sql_features.txt
+++ b/src/backend/catalog/sql_features.txt
@@ -229,7 +229,7 @@ F311 Schema definition statement 02 CREATE TABLE for persistent base tables YES
F311 Schema definition statement 03 CREATE VIEW YES
F311 Schema definition statement 04 CREATE VIEW: WITH CHECK OPTION YES
F311 Schema definition statement 05 GRANT statement YES
-F312 MERGE statement NO
+F312 MERGE statement NO Consider INSERT ... ON CONFLICT DO UPDATE
F313 Enhanced MERGE statement NO
F314 MERGE statement with DELETE branch NO
F321 User authorization YES
diff --git a/src/backend/commands/constraint.c b/src/backend/commands/constraint.c
index 561d8fae57..e49affba9e 100644
--- a/src/backend/commands/constraint.c
+++ b/src/backend/commands/constraint.c
@@ -172,7 +172,7 @@ unique_key_recheck(PG_FUNCTION_ARGS)
*/
check_exclusion_constraint(trigdata->tg_relation, indexRel, indexInfo,
&(new_row->t_self), values, isnull,
- estate, false, false);
+ estate, false);
}
/*
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index aa8ae4b9bc..00a2417a09 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -2284,7 +2284,7 @@ CopyFrom(CopyState cstate)
1, /* dummy rangetable index */
0);
- ExecOpenIndices(resultRelInfo);
+ ExecOpenIndices(resultRelInfo, false);
estate->es_result_relations = resultRelInfo;
estate->es_num_result_relations = 1;
@@ -2439,7 +2439,8 @@ CopyFrom(CopyState cstate)
if (resultRelInfo->ri_NumIndices > 0)
recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
- estate);
+ estate, false, NULL,
+ NIL);
/* AFTER ROW INSERT Triggers */
ExecARInsertTriggers(estate, resultRelInfo, tuple,
@@ -2553,7 +2554,7 @@ CopyFromInsertBatch(CopyState cstate, EState *estate, CommandId mycid,
ExecStoreTuple(bufferedTuples[i], myslot, InvalidBuffer, false);
recheckIndexes =
ExecInsertIndexTuples(myslot, &(bufferedTuples[i]->t_self),
- estate);
+ estate, false, NULL, NIL);
ExecARInsertTriggers(estate, resultRelInfo,
bufferedTuples[i],
recheckIndexes);
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index f4cc90183a..c5452e3cb6 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -103,7 +103,8 @@ static void ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir,
static void ExplainScanTarget(Scan *plan, ExplainState *es);
static void ExplainModifyTarget(ModifyTable *plan, ExplainState *es);
static void ExplainTargetRel(Plan *plan, Index rti, ExplainState *es);
-static void show_modifytable_info(ModifyTableState *mtstate, ExplainState *es);
+static void show_modifytable_info(ModifyTableState *mtstate, List *ancestors,
+ ExplainState *es);
static void ExplainMemberNodes(List *plans, PlanState **planstates,
List *ancestors, ExplainState *es);
static void ExplainSubPlans(List *plans, List *ancestors,
@@ -744,6 +745,9 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
case T_ModifyTable:
*rels_used = bms_add_member(*rels_used,
((ModifyTable *) plan)->nominalRelation);
+ if (((ModifyTable *) plan)->exclRelRTI)
+ *rels_used = bms_add_member(*rels_used,
+ ((ModifyTable *) plan)->exclRelRTI);
break;
default:
break;
@@ -1466,7 +1470,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
planstate, es);
break;
case T_ModifyTable:
- show_modifytable_info((ModifyTableState *) planstate, es);
+ show_modifytable_info((ModifyTableState *) planstate, ancestors,
+ es);
break;
case T_Hash:
show_hash_info((HashState *) planstate, es);
@@ -2317,18 +2322,22 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es)
/*
* Show extra information for a ModifyTable node
*
- * We have two objectives here. First, if there's more than one target table
- * or it's different from the nominal target, identify the actual target(s).
- * Second, give FDWs a chance to display extra info about foreign targets.
+ * We have three objectives here. First, if there's more than one target
+ * table or it's different from the nominal target, identify the actual
+ * target(s). Second, give FDWs a chance to display extra info about foreign
+ * targets. Third, show information about ON CONFLICT.
*/
static void
-show_modifytable_info(ModifyTableState *mtstate, ExplainState *es)
+show_modifytable_info(ModifyTableState *mtstate, List *ancestors,
+ ExplainState *es)
{
ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
const char *operation;
const char *foperation;
bool labeltargets;
int j;
+ List *idxNames = NIL;
+ ListCell *lst;
switch (node->operation)
{
@@ -2414,6 +2423,55 @@ show_modifytable_info(ModifyTableState *mtstate, ExplainState *es)
}
}
+ /* Gather names of ON CONFLICT arbiter indexes */
+ foreach(lst, node->arbiterIndexes)
+ {
+ char *indexname = get_rel_name(lfirst_oid(lst));
+
+ idxNames = lappend(idxNames, indexname);
+ }
+
+ if (node->onConflictAction != ONCONFLICT_NONE)
+ {
+ ExplainProperty("Conflict Resolution",
+ node->onConflictAction == ONCONFLICT_NOTHING ?
+ "NOTHING" : "UPDATE",
+ false, es);
+
+ /*
+ * Don't display arbiter indexes at all when DO NOTHING variant
+ * implicitly ignores all conflicts
+ */
+ if (idxNames)
+ ExplainPropertyList("Conflict Arbiter Indexes", idxNames, es);
+
+ /* ON CONFLICT DO UPDATE WHERE qual is specially displayed */
+ if (node->onConflictWhere)
+ {
+ show_upper_qual((List *) node->onConflictWhere, "Conflict Filter",
+ &mtstate->ps, ancestors, es);
+ show_instrumentation_count("Rows Removed by Conflict Filter", 1, &mtstate->ps, es);
+ }
+
+ /* EXPLAIN ANALYZE display of actual outcome for each tuple proposed */
+ if (es->analyze && mtstate->ps.instrument)
+ {
+ double total;
+ double insert_path;
+ double other_path;
+
+ InstrEndLoop(mtstate->mt_plans[0]->instrument);
+
+ /* count the number of source rows */
+ total = mtstate->mt_plans[0]->instrument->ntuples;
+ other_path = mtstate->ps.instrument->nfiltered2;
+ insert_path = total - other_path;
+
+ ExplainPropertyFloat("Tuples Inserted", insert_path, 0, es);
+ ExplainPropertyFloat("Conflicting Tuples", other_path, 0, es);
+ }
+ }
+
if (labeltargets)
ExplainCloseGroup("Target Tables", "Target Tables", false, es);
}
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 222e7fce85..b537ca5e66 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -2421,21 +2421,10 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
TupleTableSlot *newSlot;
int i;
Bitmapset *updatedCols;
- Bitmapset *keyCols;
LockTupleMode lockmode;
- /*
- * Compute lock mode to use. If columns that are part of the key have not
- * been modified, then we can use a weaker lock, allowing for better
- * concurrency.
- */
- updatedCols = GetUpdatedColumns(relinfo, estate);
- keyCols = RelationGetIndexAttrBitmap(relinfo->ri_RelationDesc,
- INDEX_ATTR_BITMAP_KEY);
- if (bms_overlap(keyCols, updatedCols))
- lockmode = LockTupleExclusive;
- else
- lockmode = LockTupleNoKeyExclusive;
+ /* Determine lock mode to use */
+ lockmode = ExecUpdateLockMode(estate, relinfo);
Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid));
if (fdw_trigtuple == NULL)
@@ -2476,6 +2465,7 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
TRIGGER_EVENT_ROW |
TRIGGER_EVENT_BEFORE;
LocTriggerData.tg_relation = relinfo->ri_RelationDesc;
+ updatedCols = GetUpdatedColumns(relinfo, estate);
for (i = 0; i < trigdesc->numtriggers; i++)
{
Trigger *trigger = &trigdesc->triggers[i];
@@ -2783,6 +2773,9 @@ ltrmark:;
*/
return NULL;
+ case HeapTupleInvisible:
+ elog(ERROR, "attempted to lock invisible tuple");
+
default:
ReleaseBuffer(buffer);
elog(ERROR, "unrecognized heap_lock_tuple status: %u", test);
diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c
index a697682b20..e7cf72b387 100644
--- a/src/backend/executor/execIndexing.c
+++ b/src/backend/executor/execIndexing.c
@@ -50,6 +50,50 @@
* to the caller. The caller must re-check them later by calling
* check_exclusion_constraint().
*
+ * Speculative insertion
+ * ---------------------
+ *
+ * Speculative insertion is a is a two-phase mechanism, used to implement
+ * INSERT ... ON CONFLICT DO UPDATE/NOTHING. The tuple is first inserted
+ * to the heap and update the indexes as usual, but if a constraint is
+ * violated, we can still back out the insertion without aborting the whole
+ * transaction. In an INSERT ... ON CONFLICT statement, if a conflict is
+ * detected, the inserted tuple is backed out and the ON CONFLICT action is
+ * executed instead.
+ *
+ * Insertion to a unique index works as usual: the index AM checks for
+ * duplicate keys atomically with the insertion. But instead of throwing
+ * an error on a conflict, the speculatively inserted heap tuple is backed
+ * out.
+ *
+ * Exclusion constraints are slightly more complicated. As mentioned
+ * earlier, there is a risk of deadlock when two backends insert the same
+ * key concurrently. That was not a problem for regular insertions, when
+ * one of the transactions has to be aborted anyway, but with a speculative
+ * insertion we cannot let a deadlock happen, because we only want to back
+ * out the speculatively inserted tuple on conflict, not abort the whole
+ * transaction.
+ *
+ * When a backend detects that the speculative insertion conflicts with
+ * another in-progress tuple, it has two options:
+ *
+ * 1. back out the speculatively inserted tuple, then wait for the other
+ * transaction, and retry. Or,
+ * 2. wait for the other transaction, with the speculatively inserted tuple
+ * still in place.
+ *
+ * If two backends insert at the same time, and both try to wait for each
+ * other, they will deadlock. So option 2 is not acceptable. Option 1
+ * avoids the deadlock, but it is prone to a livelock instead. Both
+ * transactions will wake up immediately as the other transaction backs
+ * out. Then they both retry, and conflict with each other again, lather,
+ * rinse, repeat.
+ *
+ * To avoid the livelock, one of the backends must back out first, and then
+ * wait, while the other one waits without backing out. It doesn't matter
+ * which one backs out, so we employ an arbitrary rule that the transaction
+ * with the higher XID backs out.
+ *
*
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
@@ -63,12 +107,30 @@
#include "postgres.h"
#include "access/relscan.h"
+#include "access/xact.h"
#include "catalog/index.h"
#include "executor/executor.h"
#include "nodes/nodeFuncs.h"
#include "storage/lmgr.h"
#include "utils/tqual.h"
+/* waitMode argument to check_exclusion_or_unique_constraint() */
+typedef enum
+{
+ CEOUC_WAIT,
+ CEOUC_NOWAIT,
+ CEOUC_LIVELOCK_PREVENTING_WAIT,
+} CEOUC_WAIT_MODE;
+
+static bool check_exclusion_or_unique_constraint(Relation heap, Relation index,
+ IndexInfo *indexInfo,
+ ItemPointer tupleid,
+ Datum *values, bool *isnull,
+ EState *estate, bool newIndex,
+ CEOUC_WAIT_MODE waitMode,
+ bool errorOK,
+ ItemPointer conflictTid);
+
static bool index_recheck_constraint(Relation index, Oid *constr_procs,
Datum *existing_values, bool *existing_isnull,
Datum *new_values);
@@ -84,7 +146,7 @@ static bool index_recheck_constraint(Relation index, Oid *constr_procs,
* ----------------------------------------------------------------
*/
void
-ExecOpenIndices(ResultRelInfo *resultRelInfo)
+ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative)
{
Relation resultRelation = resultRelInfo->ri_RelationDesc;
List *indexoidlist;
@@ -137,6 +199,13 @@ ExecOpenIndices(ResultRelInfo *resultRelInfo)
/* extract index key information from the index's pg_index info */
ii = BuildIndexInfo(indexDesc);
+ /*
+ * If the indexes are to be used for speculative insertion, add extra
+ * information required by unique index entries.
+ */
+ if (speculative && ii->ii_Unique)
+ BuildSpeculativeIndexInfo(indexDesc, ii);
+
relationDescs[i] = indexDesc;
indexInfoArray[i] = ii;
i++;
@@ -186,7 +255,9 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
* Unique and exclusion constraints are enforced at the same
* time. This returns a list of index OIDs for any unique or
* exclusion constraints that are deferred and that had
- * potential (unconfirmed) conflicts.
+ * potential (unconfirmed) conflicts. (if noDupErr == true,
+ * the same is done for non-deferred constraints, but report
+ * if conflict was speculative or deferred conflict to caller)
*
* CAUTION: this must not be called for a HOT update.
* We can't defend against that here for lack of info.
@@ -196,7 +267,10 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
List *
ExecInsertIndexTuples(TupleTableSlot *slot,
ItemPointer tupleid,
- EState *estate)
+ EState *estate,
+ bool noDupErr,
+ bool *specConflict,
+ List *arbiterIndexes)
{
List *result = NIL;
ResultRelInfo *resultRelInfo;
@@ -236,12 +310,17 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
IndexInfo *indexInfo;
IndexUniqueCheck checkUnique;
bool satisfiesConstraint;
+ bool arbiter;
if (indexRelation == NULL)
continue;
indexInfo = indexInfoArray[i];
+ /* Record if speculative insertion arbiter */
+ arbiter = list_member_oid(arbiterIndexes,
+ indexRelation->rd_index->indexrelid);
+
/* If the index is marked as read-only, ignore it */
if (!indexInfo->ii_ReadyForInserts)
continue;
@@ -288,9 +367,14 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
* For a deferrable unique index, we tell the index AM to just detect
* possible non-uniqueness, and we add the index OID to the result
* list if further checking is needed.
+ *
+ * For a speculative insertion (used by INSERT ... ON CONFLICT), do
+ * the same as for a deferrable unique index.
*/
if (!indexRelation->rd_index->indisunique)
checkUnique = UNIQUE_CHECK_NO;
+ else if (noDupErr && (arbiterIndexes == NIL || arbiter))
+ checkUnique = UNIQUE_CHECK_PARTIAL;
else if (indexRelation->rd_index->indimmediate)
checkUnique = UNIQUE_CHECK_YES;
else
@@ -308,8 +392,11 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
* If the index has an associated exclusion constraint, check that.
* This is simpler than the process for uniqueness checks since we
* always insert first and then check. If the constraint is deferred,
- * we check now anyway, but don't throw error on violation; instead
- * we'll queue a recheck event.
+ * we check now anyway, but don't throw error on violation or wait for
+ * a conclusive outcome from a concurrent insertion; instead we'll
+ * queue a recheck event. Similarly, noDupErr callers (speculative
+ * inserters) will recheck later, and wait for a conclusive outcome
+ * then.
*
* An index for an exclusion constraint can't also be UNIQUE (not an
* essential property, we just don't allow it in the grammar), so no
@@ -317,13 +404,31 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
*/
if (indexInfo->ii_ExclusionOps != NULL)
{
- bool errorOK = !indexRelation->rd_index->indimmediate;
+ bool violationOK;
+ bool waitMode;
+
+ if (noDupErr)
+ {
+ violationOK = true;
+ waitMode = CEOUC_LIVELOCK_PREVENTING_WAIT;
+ }
+ else if (!indexRelation->rd_index->indimmediate)
+ {
+ violationOK = true;
+ waitMode = CEOUC_NOWAIT;
+ }
+ else
+ {
+ violationOK = false;
+ waitMode = CEOUC_WAIT;
+ }
satisfiesConstraint =
- check_exclusion_constraint(heapRelation,
- indexRelation, indexInfo,
- tupleid, values, isnull,
- estate, false, errorOK);
+ check_exclusion_or_unique_constraint(heapRelation,
+ indexRelation, indexInfo,
+ tupleid, values, isnull,
+ estate, false,
+ waitMode, violationOK, NULL);
}
if ((checkUnique == UNIQUE_CHECK_PARTIAL ||
@@ -333,46 +438,213 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
/*
* The tuple potentially violates the uniqueness or exclusion
* constraint, so make a note of the index so that we can re-check
- * it later.
+ * it later. Speculative inserters are told if there was a
+ * speculative conflict, since that always requires a restart.
*/
result = lappend_oid(result, RelationGetRelid(indexRelation));
+ if (indexRelation->rd_index->indimmediate && specConflict)
+ *specConflict = true;
}
}
return result;
}
+/* ----------------------------------------------------------------
+ * ExecCheckIndexConstraints
+ *
+ * This routine checks if a tuple violates any unique or
+ * exclusion constraints. Returns true if there is no no conflict.
+ * Otherwise returns false, and the TID of the conflicting
+ * tuple is returned in *conflictTid.
+ *
+ * If 'arbiterIndexes' is given, only those indexes are checked.
+ * NIL means all indexes.
+ *
+ * Note that this doesn't lock the values in any way, so it's
+ * possible that a conflicting tuple is inserted immediately
+ * after this returns. But this can be used for a pre-check
+ * before insertion.
+ * ----------------------------------------------------------------
+ */
+bool
+ExecCheckIndexConstraints(TupleTableSlot *slot,
+ EState *estate, ItemPointer conflictTid,
+ List *arbiterIndexes)
+{
+ ResultRelInfo *resultRelInfo;
+ int i;
+ int numIndices;
+ RelationPtr relationDescs;
+ Relation heapRelation;
+ IndexInfo **indexInfoArray;
+ ExprContext *econtext;
+ Datum values[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ ItemPointerData invalidItemPtr;
+ bool checkedIndex = false;
+
+ ItemPointerSetInvalid(conflictTid);
+ ItemPointerSetInvalid(&invalidItemPtr);
+
+ /*
+ * Get information from the result relation info structure.
+ */
+ resultRelInfo = estate->es_result_relation_info;
+ numIndices = resultRelInfo->ri_NumIndices;
+ relationDescs = resultRelInfo->ri_IndexRelationDescs;
+ indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
+ heapRelation = resultRelInfo->ri_RelationDesc;
+
+ /*
+ * We will use the EState's per-tuple context for evaluating predicates
+ * and index expressions (creating it if it's not already there).
+ */
+ econtext = GetPerTupleExprContext(estate);
+
+ /* Arrange for econtext's scan tuple to be the tuple under test */
+ econtext->ecxt_scantuple = slot;
+
+ /*
+ * For each index, form index tuple and check if it satisfies the
+ * constraint.
+ */
+ for (i = 0; i < numIndices; i++)
+ {
+ Relation indexRelation = relationDescs[i];
+ IndexInfo *indexInfo;
+ bool satisfiesConstraint;
+
+ if (indexRelation == NULL)
+ continue;
+
+ indexInfo = indexInfoArray[i];
+
+ if (!indexInfo->ii_Unique && !indexInfo->ii_ExclusionOps)
+ continue;
+
+ /* If the index is marked as read-only, ignore it */
+ if (!indexInfo->ii_ReadyForInserts)
+ continue;
+
+ /* When specific arbiter indexes requested, only examine them */
+ if (arbiterIndexes != NIL &&
+ !list_member_oid(arbiterIndexes,
+ indexRelation->rd_index->indexrelid))
+ continue;
+
+ if (!indexRelation->rd_index->indimmediate)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("ON CONFLICT does not support deferred unique constraints/exclusion constraints as arbiters"),
+ errtableconstraint(heapRelation,
+ RelationGetRelationName(indexRelation))));
+
+ checkedIndex = true;
+
+ /* Check for partial index */
+ if (indexInfo->ii_Predicate != NIL)
+ {
+ List *predicate;
+
+ /*
+ * If predicate state not set up yet, create it (in the estate's
+ * per-query context)
+ */
+ predicate = indexInfo->ii_PredicateState;
+ if (predicate == NIL)
+ {
+ predicate = (List *)
+ ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
+ estate);
+ indexInfo->ii_PredicateState = predicate;
+ }
+
+ /* Skip this index-update if the predicate isn't satisfied */
+ if (!ExecQual(predicate, econtext, false))
+ continue;
+ }
+
+ /*
+ * FormIndexDatum fills in its values and isnull parameters with the
+ * appropriate values for the column(s) of the index.
+ */
+ FormIndexDatum(indexInfo,
+ slot,
+ estate,
+ values,
+ isnull);
+
+ satisfiesConstraint =
+ check_exclusion_or_unique_constraint(heapRelation, indexRelation,
+ indexInfo, &invalidItemPtr,
+ values, isnull, estate, false,
+ CEOUC_WAIT, true,
+ conflictTid);
+ if (!satisfiesConstraint)
+ return false;
+ }
+
+ if (arbiterIndexes != NIL && !checkedIndex)
+ elog(ERROR, "unexpected failure to find arbiter index");
+
+ return true;
+}
+
/*
- * Check for violation of an exclusion constraint
+ * Check for violation of an exclusion or unique constraint
*
* heap: the table containing the new tuple
- * index: the index supporting the exclusion constraint
+ * index: the index supporting the constraint
* indexInfo: info about the index, including the exclusion properties
- * tupleid: heap TID of the new tuple we have just inserted
+ * tupleid: heap TID of the new tuple we have just inserted (invalid if we
+ * haven't inserted a new tuple yet)
* values, isnull: the *index* column values computed for the new tuple
* estate: an EState we can do evaluation in
* newIndex: if true, we are trying to build a new index (this affects
* only the wording of error messages)
- * errorOK: if true, don't throw error for violation
+ * waitMode: whether to wait for concurrent inserters/deleters
+ * violationOK: if true, don't throw error for violation
+ * conflictTid: if not-NULL, the TID of the conflicting tuple is returned here
*
* Returns true if OK, false if actual or potential violation
*
- * When errorOK is true, we report violation without waiting to see if any
- * concurrent transaction has committed or not; so the violation is only
- * potential, and the caller must recheck sometime later. This behavior
- * is convenient for deferred exclusion checks; we need not bother queuing
- * a deferred event if there is definitely no conflict at insertion time.
+ * 'waitMode' determines what happens if a conflict is detected with a tuple
+ * that was inserted or deleted by a transaction that's still running.
+ * CEOUC_WAIT means that we wait for the transaction to commit, before
+ * throwing an error or returning. CEOUC_NOWAIT means that we report the
+ * violation immediately; so the violation is only potential, and the caller
+ * must recheck sometime later. This behavior is convenient for deferred
+ * exclusion checks; we need not bother queuing a deferred event if there is
+ * definitely no conflict at insertion time.
+ *
+ * CEOUC_LIVELOCK_PREVENTING_WAIT is like CEOUC_NOWAIT, but we will sometimes
+ * wait anyway, to prevent livelocking if two transactions try inserting at
+ * the same time. This is used with speculative insertions, for INSERT ON
+ * CONFLICT statements. (See notes in file header)
*
- * When errorOK is false, we'll throw error on violation, so a false result
- * is impossible.
+ * If violationOK is true, we just report the potential or actual violation to
+ * the caller by returning 'false'. Otherwise we throw a descriptive error
+ * message here. When violationOK is false, a false result is impossible.
+ *
+ * Note: The indexam is normally responsible for checking unique constraints,
+ * so this normally only needs to be used for exclusion constraints. But this
+ * function is also called when doing a "pre-check" for conflicts on a unique
+ * constraint, when doing speculative insertion. Caller may use the returned
+ * conflict TID to take further steps.
*/
-bool
-check_exclusion_constraint(Relation heap, Relation index, IndexInfo *indexInfo,
- ItemPointer tupleid, Datum *values, bool *isnull,
- EState *estate, bool newIndex, bool errorOK)
+static bool
+check_exclusion_or_unique_constraint(Relation heap, Relation index,
+ IndexInfo *indexInfo,
+ ItemPointer tupleid,
+ Datum *values, bool *isnull,
+ EState *estate, bool newIndex,
+ CEOUC_WAIT_MODE waitMode,
+ bool violationOK,
+ ItemPointer conflictTid)
{
- Oid *constr_procs = indexInfo->ii_ExclusionProcs;
- uint16 *constr_strats = indexInfo->ii_ExclusionStrats;
+ Oid *constr_procs;
+ uint16 *constr_strats;
Oid *index_collations = index->rd_indcollation;
int index_natts = index->rd_index->indnatts;
IndexScanDesc index_scan;
@@ -386,6 +658,17 @@ check_exclusion_constraint(Relation heap, Relation index, IndexInfo *indexInfo,
TupleTableSlot *existing_slot;
TupleTableSlot *save_scantuple;
+ if (indexInfo->ii_ExclusionOps)
+ {
+ constr_procs = indexInfo->ii_ExclusionProcs;
+ constr_strats = indexInfo->ii_ExclusionStrats;
+ }
+ else
+ {
+ constr_procs = indexInfo->ii_UniqueProcs;
+ constr_strats = indexInfo->ii_UniqueStrats;
+ }
+
/*
* If any of the input values are NULL, the constraint check is assumed to
* pass (i.e., we assume the operators are strict).
@@ -450,7 +733,8 @@ retry:
/*
* Ignore the entry for the tuple we're trying to check.
*/
- if (ItemPointerEquals(tupleid, &tup->t_self))
+ if (ItemPointerIsValid(tupleid) &&
+ ItemPointerEquals(tupleid, &tup->t_self))
{
if (found_self) /* should not happen */
elog(ERROR, "found self tuple multiple times in index \"%s\"",
@@ -480,39 +764,47 @@ retry:
}
/*
- * At this point we have either a conflict or a potential conflict. If
- * we're not supposed to raise error, just return the fact of the
- * potential conflict without waiting to see if it's real.
- */
- if (errorOK)
- {
- conflict = true;
- break;
- }
-
- /*
+ * At this point we have either a conflict or a potential conflict.
+ *
* If an in-progress transaction is affecting the visibility of this
- * tuple, we need to wait for it to complete and then recheck. For
- * simplicity we do rechecking by just restarting the whole scan ---
- * this case probably doesn't happen often enough to be worth trying
- * harder, and anyway we don't want to hold any index internal locks
- * while waiting.
+ * tuple, we need to wait for it to complete and then recheck (unless
+ * the caller requested not to). For simplicity we do rechecking by
+ * just restarting the whole scan --- this case probably doesn't
+ * happen often enough to be worth trying harder, and anyway we don't
+ * want to hold any index internal locks while waiting.
*/
xwait = TransactionIdIsValid(DirtySnapshot.xmin) ?
DirtySnapshot.xmin : DirtySnapshot.xmax;
- if (TransactionIdIsValid(xwait))
+ if (TransactionIdIsValid(xwait) &&
+ (waitMode == CEOUC_WAIT ||
+ (waitMode == CEOUC_LIVELOCK_PREVENTING_WAIT &&
+ DirtySnapshot.speculativeToken &&
+ TransactionIdPrecedes(GetCurrentTransactionId(), xwait))))
{
ctid_wait = tup->t_data->t_ctid;
index_endscan(index_scan);
- XactLockTableWait(xwait, heap, &ctid_wait,
- XLTW_RecheckExclusionConstr);
+ if (DirtySnapshot.speculativeToken)
+ SpeculativeInsertionWait(DirtySnapshot.xmin,
+ DirtySnapshot.speculativeToken);
+ else
+ XactLockTableWait(xwait, heap, &ctid_wait,
+ XLTW_RecheckExclusionConstr);
goto retry;
}
/*
- * We have a definite conflict. Report it.
+ * We have a definite conflict (or a potential one, but the caller
+ * didn't want to wait). Return it to caller, or report it.
*/
+ if (violationOK)
+ {
+ conflict = true;
+ if (conflictTid)
+ *conflictTid = tup->t_self;
+ break;
+ }
+
error_new = BuildIndexValueDescription(index, values, isnull);
error_existing = BuildIndexValueDescription(index, existing_values,
existing_isnull);
@@ -544,10 +836,10 @@ retry:
/*
* Ordinarily, at this point the search should have found the originally
- * inserted tuple, unless we exited the loop early because of conflict.
- * However, it is possible to define exclusion constraints for which that
- * wouldn't be true --- for instance, if the operator is <>. So we no
- * longer complain if found_self is still false.
+ * inserted tuple (if any), unless we exited the loop early because of
+ * conflict. However, it is possible to define exclusion constraints for
+ * which that wouldn't be true --- for instance, if the operator is <>.
+ * So we no longer complain if found_self is still false.
*/
econtext->ecxt_scantuple = save_scantuple;
@@ -557,6 +849,25 @@ retry:
return !conflict;
}
+/*
+ * Check for violation of an exclusion constraint
+ *
+ * This is a dumbed down version of check_exclusion_or_unique_constraint
+ * for external callers. They don't need all the special modes.
+ */
+void
+check_exclusion_constraint(Relation heap, Relation index,
+ IndexInfo *indexInfo,
+ ItemPointer tupleid,
+ Datum *values, bool *isnull,
+ EState *estate, bool newIndex)
+{
+ (void) check_exclusion_or_unique_constraint(heap, index, indexInfo, tupleid,
+ values, isnull,
+ estate, newIndex,
+ CEOUC_WAIT, false, NULL);
+}
+
/*
* Check existing tuple's index values to see if it really matches the
* exclusion condition against the new_values. Returns true if conflict.
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 4272d9bc15..0dee949178 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -1813,6 +1813,12 @@ ExecWithCheckOptions(WCOKind kind, ResultRelInfo *resultRelInfo,
errmsg("new row violates row level security policy for \"%s\"",
wco->relname)));
break;
+ case WCO_RLS_CONFLICT_CHECK:
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("new row violates row level security policy (USING expression) for \"%s\"",
+ wco->relname)));
+ break;
default:
elog(ERROR, "unrecognized WCO kind: %u", wco->kind);
break;
@@ -1972,6 +1978,31 @@ ExecBuildSlotValueDescription(Oid reloid,
}
+/*
+ * ExecUpdateLockMode -- find the appropriate UPDATE tuple lock mode for a
+ * given ResultRelInfo
+ */
+LockTupleMode
+ExecUpdateLockMode(EState *estate, ResultRelInfo *relinfo)
+{
+ Bitmapset *keyCols;
+ Bitmapset *updatedCols;
+
+ /*
+ * Compute lock mode to use. If columns that are part of the key have not
+ * been modified, then we can use a weaker lock, allowing for better
+ * concurrency.
+ */
+ updatedCols = GetUpdatedColumns(relinfo, estate);
+ keyCols = RelationGetIndexAttrBitmap(relinfo->ri_RelationDesc,
+ INDEX_ATTR_BITMAP_KEY);
+
+ if (bms_overlap(keyCols, updatedCols))
+ return LockTupleExclusive;
+
+ return LockTupleNoKeyExclusive;
+}
+
/*
* ExecFindRowMark -- find the ExecRowMark struct for given rangetable index
*/
@@ -2186,8 +2217,9 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
* recycled and reused for an unrelated tuple. This implies that
* the latest version of the row was deleted, so we need do
* nothing. (Should be safe to examine xmin without getting
- * buffer's content lock, since xmin never changes in an existing
- * tuple.)
+ * buffer's content lock. We assume reading a TransactionId to be
+ * atomic, and Xmin never changes in an existing tuple, except to
+ * invalid or frozen, and neither of those can match priorXmax.)
*/
if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
priorXmax))
@@ -2268,11 +2300,12 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
* case, so as to avoid the "Halloween problem" of
* repeated update attempts. In the latter case it might
* be sensible to fetch the updated tuple instead, but
- * doing so would require changing heap_lock_tuple as well
- * as heap_update and heap_delete to not complain about
- * updating "invisible" tuples, which seems pretty scary.
- * So for now, treat the tuple as deleted and do not
- * process.
+ * doing so would require changing heap_update and
+ * heap_delete to not complain about updating "invisible"
+ * tuples, which seems pretty scary (heap_lock_tuple will
+ * not complain, but few callers expect HeapTupleInvisible,
+ * and we're not one of them). So for now, treat the tuple
+ * as deleted and do not process.
*/
ReleaseBuffer(buffer);
return NULL;
@@ -2287,6 +2320,9 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
+
+ /* Should not encounter speculative tuple on recheck */
+ Assert(!HeapTupleHeaderIsSpeculative(tuple.t_data));
if (!ItemPointerEquals(&hufd.ctid, &tuple.t_self))
{
/* it was updated, so look at the updated version */
@@ -2302,6 +2338,9 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
ReleaseBuffer(buffer);
return NULL;
+ case HeapTupleInvisible:
+ elog(ERROR, "attempted to lock invisible tuple");
+
default:
ReleaseBuffer(buffer);
elog(ERROR, "unrecognized heap_lock_tuple status: %u",
diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c
index bb6df47a95..5ae106c06a 100644
--- a/src/backend/executor/nodeLockRows.c
+++ b/src/backend/executor/nodeLockRows.c
@@ -152,10 +152,11 @@ lnext:
* case, so as to avoid the "Halloween problem" of repeated
* update attempts. In the latter case it might be sensible
* to fetch the updated tuple instead, but doing so would
- * require changing heap_lock_tuple as well as heap_update and
- * heap_delete to not complain about updating "invisible"
- * tuples, which seems pretty scary. So for now, treat the
- * tuple as deleted and do not process.
+ * require changing heap_update and heap_delete to not complain
+ * about updating "invisible" tuples, which seems pretty scary
+ * (heap_lock_tuple will not complain, but few callers expect
+ * HeapTupleInvisible, and we're not one of them). So for now,
+ * treat the tuple as deleted and do not process.
*/
goto lnext;
@@ -228,6 +229,9 @@ lnext:
/* Continue loop until we have all target tuples */
break;
+ case HeapTupleInvisible:
+ elog(ERROR, "attempted to lock invisible tuple");
+
default:
elog(ERROR, "unrecognized heap_lock_tuple status: %u",
test);
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 31666edfa8..34435c7e50 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -46,12 +46,22 @@
#include "miscadmin.h"
#include "nodes/nodeFuncs.h"
#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/tqual.h"
+static bool ExecOnConflictUpdate(ModifyTableState *mtstate,
+ ResultRelInfo *resultRelInfo,
+ ItemPointer conflictTid,
+ TupleTableSlot *planSlot,
+ TupleTableSlot *excludedSlot,
+ EState *estate,
+ bool canSetTag,
+ TupleTableSlot **returning);
+
/*
* Verify that the tuples to be produced by INSERT or UPDATE match the
* target relation's rowtype
@@ -151,6 +161,51 @@ ExecProcessReturning(ProjectionInfo *projectReturning,
return ExecProject(projectReturning, NULL);
}
+/*
+ * ExecCheckHeapTupleVisible -- verify heap tuple is visible
+ *
+ * It would not be consistent with guarantees of the higher isolation levels to
+ * proceed with avoiding insertion (taking speculative insertion's alternative
+ * path) on the basis of another tuple that is not visible to MVCC snapshot.
+ * Check for the need to raise a serialization failure, and do so as necessary.
+ */
+static void
+ExecCheckHeapTupleVisible(EState *estate,
+ HeapTuple tuple,
+ Buffer buffer)
+{
+ if (!IsolationUsesXactSnapshot())
+ return;
+
+ if (!HeapTupleSatisfiesVisibility(tuple, estate->es_snapshot, buffer))
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("could not serialize access due to concurrent update")));
+}
+
+/*
+ * ExecCheckTIDVisible -- convenience variant of ExecCheckHeapTupleVisible()
+ */
+static void
+ExecCheckTIDVisible(EState *estate,
+ ResultRelInfo *relinfo,
+ ItemPointer tid)
+{
+ Relation rel = relinfo->ri_RelationDesc;
+ Buffer buffer;
+ HeapTupleData tuple;
+
+ /* Redundantly check isolation level */
+ if (!IsolationUsesXactSnapshot())
+ return;
+
+ tuple.t_self = *tid;
+ if (!heap_fetch(rel, SnapshotAny, &tuple, &buffer, false, NULL))
+ elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT");
+ ExecCheckHeapTupleVisible(estate, &tuple, buffer);
+ ReleaseBuffer(buffer);
+}
+
/* ----------------------------------------------------------------
* ExecInsert
*
@@ -161,8 +216,11 @@ ExecProcessReturning(ProjectionInfo *projectReturning,
* ----------------------------------------------------------------
*/
static TupleTableSlot *
-ExecInsert(TupleTableSlot *slot,
+ExecInsert(ModifyTableState *mtstate,
+ TupleTableSlot *slot,
TupleTableSlot *planSlot,
+ List *arbiterIndexes,
+ OnConflictAction onconflict,
EState *estate,
bool canSetTag)
{
@@ -199,7 +257,15 @@ ExecInsert(TupleTableSlot *slot,
if (resultRelationDesc->rd_rel->relhasoids)
HeapTupleSetOid(tuple, InvalidOid);
- /* BEFORE ROW INSERT Triggers */
+ /*
+ * BEFORE ROW INSERT Triggers.
+ *
+ * Note: We fire BEFORE ROW TRIGGERS for every attempted insertion in an
+ * INSERT ... ON CONFLICT statement. We cannot check for constraint
+ * violations before firing these triggers, because they can change the
+ * values to insert. Also, they can run arbitrary user-defined code with
+ * side-effects that we can't cancel by just not inserting the tuple.
+ */
if (resultRelInfo->ri_TrigDesc &&
resultRelInfo->ri_TrigDesc->trig_insert_before_row)
{
@@ -268,21 +334,132 @@ ExecInsert(TupleTableSlot *slot,
if (resultRelationDesc->rd_att->constr)
ExecConstraints(resultRelInfo, slot, estate);
- /*
- * insert the tuple
- *
- * Note: heap_insert returns the tid (location) of the new tuple in
- * the t_self field.
- */
- newId = heap_insert(resultRelationDesc, tuple,
- estate->es_output_cid, 0, NULL);
+ if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > 0)
+ {
+ /* Perform a speculative insertion. */
+ uint32 specToken;
+ ItemPointerData conflictTid;
+ bool specConflict;
- /*
- * insert index entries for tuple
- */
- if (resultRelInfo->ri_NumIndices > 0)
+ /*
+ * Do a non-conclusive check for conflicts first.
+ *
+ * We're not holding any locks yet, so this doesn't guarantee that
+ * the later insert won't conflict. But it avoids leaving behind
+ * a lot of canceled speculative insertions, if you run a lot of
+ * INSERT ON CONFLICT statements that do conflict.
+ *
+ * We loop back here if we find a conflict below, either during
+ * the pre-check, or when we re-check after inserting the tuple
+ * speculatively. See the executor README for a full discussion
+ * of speculative insertion.
+ */
+ vlock:
+ specConflict = false;
+ if (!ExecCheckIndexConstraints(slot, estate, &conflictTid,
+ arbiterIndexes))
+ {
+ /* committed conflict tuple found */
+ if (onconflict == ONCONFLICT_UPDATE)
+ {
+ /*
+ * In case of ON CONFLICT DO UPDATE, execute the UPDATE
+ * part. Be prepared to retry if the UPDATE fails because
+ * of another concurrent UPDATE/DELETE to the conflict
+ * tuple.
+ */
+ TupleTableSlot *returning = NULL;
+
+ if (ExecOnConflictUpdate(mtstate, resultRelInfo,
+ &conflictTid, planSlot, slot,
+ estate, canSetTag, &returning))
+ {
+ InstrCountFiltered2(&mtstate->ps, 1);
+ return returning;
+ }
+ else
+ goto vlock;
+ }
+ else
+ {
+ /*
+ * In case of ON CONFLICT DO NOTHING, do nothing.
+ * However, verify that the tuple is visible to the
+ * executor's MVCC snapshot at higher isolation levels.
+ */
+ Assert(onconflict == ONCONFLICT_NOTHING);
+ ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid);
+ InstrCountFiltered2(&mtstate->ps, 1);
+ return NULL;
+ }
+ }
+
+ /*
+ * Before we start insertion proper, acquire our "speculative
+ * insertion lock". Others can use that to wait for us to decide
+ * if we're going to go ahead with the insertion, instead of
+ * waiting for the whole transaction to complete.
+ */
+ specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId());
+ HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken);
+
+ /* insert the tuple, with the speculative token */
+ newId = heap_insert(resultRelationDesc, tuple,
+ estate->es_output_cid,
+ HEAP_INSERT_SPECULATIVE,
+ NULL);
+
+ /* insert index entries for tuple */
recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
- estate);
+ estate, true, &specConflict,
+ arbiterIndexes);
+
+ /* adjust the tuple's state accordingly */
+ if (!specConflict)
+ heap_finish_speculative(resultRelationDesc, tuple);
+ else
+ heap_abort_speculative(resultRelationDesc, tuple);
+
+ /*
+ * Wake up anyone waiting for our decision. They will re-check
+ * the tuple, see that it's no longer speculative, and wait on our
+ * XID as if this was a regularly inserted tuple all along. Or if
+ * we killed the tuple, they will see it's dead, and proceed as if
+ * the tuple never existed.
+ */
+ SpeculativeInsertionLockRelease(GetCurrentTransactionId());
+
+ /*
+ * If there was a conflict, start from the beginning. We'll do
+ * the pre-check again, which will now find the conflicting tuple
+ * (unless it aborts before we get there).
+ */
+ if (specConflict)
+ {
+ list_free(recheckIndexes);
+ goto vlock;
+ }
+
+ /* Since there was no insertion conflict, we're done */
+ }
+ else
+ {
+ /*
+ * insert the tuple normally.
+ *
+ * Note: heap_insert returns the tid (location) of the new tuple
+ * in the t_self field.
+ */
+ newId = heap_insert(resultRelationDesc, tuple,
+ estate->es_output_cid,
+ 0, NULL);
+
+ /* insert index entries for tuple */
+ if (resultRelInfo->ri_NumIndices > 0)
+ recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
+ estate, false, NULL,
+ arbiterIndexes);
+ }
}
if (canSetTag)
@@ -800,7 +977,7 @@ lreplace:;
*/
if (resultRelInfo->ri_NumIndices > 0 && !HeapTupleIsHeapOnly(tuple))
recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self),
- estate);
+ estate, false, NULL, NIL);
}
if (canSetTag)
@@ -832,6 +1009,190 @@ lreplace:;
return NULL;
}
+/*
+ * ExecOnConflictUpdate --- execute UPDATE of INSERT ON CONFLICT DO UPDATE
+ *
+ * Try to lock tuple for update as part of speculative insertion. If
+ * a qual originating from ON CONFLICT DO UPDATE is satisfied, update
+ * (but still lock row, even though it may not satisfy estate's
+ * snapshot).
+ *
+ * Returns true if if we're done (with or without an update), or false if
+ * the caller must retry the INSERT from scratch.
+ */
+static bool
+ExecOnConflictUpdate(ModifyTableState *mtstate,
+ ResultRelInfo *resultRelInfo,
+ ItemPointer conflictTid,
+ TupleTableSlot *planSlot,
+ TupleTableSlot *excludedSlot,
+ EState *estate,
+ bool canSetTag,
+ TupleTableSlot **returning)
+{
+ ExprContext *econtext = mtstate->ps.ps_ExprContext;
+ Relation relation = resultRelInfo->ri_RelationDesc;
+ List *onConflictSetWhere = resultRelInfo->ri_onConflictSetWhere;
+ HeapTupleData tuple;
+ HeapUpdateFailureData hufd;
+ LockTupleMode lockmode;
+ HTSU_Result test;
+ Buffer buffer;
+
+ /* Determine lock mode to use */
+ lockmode = ExecUpdateLockMode(estate, resultRelInfo);
+
+ /*
+ * Lock tuple for update. Don't follow updates when tuple cannot be
+ * locked without doing so. A row locking conflict here means our
+ * previous conclusion that the tuple is conclusively committed is not
+ * true anymore.
+ */
+ tuple.t_self = *conflictTid;
+ test = heap_lock_tuple(relation, &tuple, estate->es_output_cid,
+ lockmode, LockWaitBlock, false, &buffer,
+ &hufd);
+ switch (test)
+ {
+ case HeapTupleMayBeUpdated:
+ /* success! */
+ break;
+
+ case HeapTupleInvisible:
+
+ /*
+ * This can occur when a just inserted tuple is updated again in
+ * the same command. E.g. because multiple rows with the same
+ * conflicting key values are inserted.
+ *
+ * This is somewhat similar to the ExecUpdate()
+ * HeapTupleSelfUpdated case. We do not want to proceed because
+ * it would lead to the same row being updated a second time in
+ * some unspecified order, and in contrast to plain UPDATEs
+ * there's no historical behavior to break.
+ *
+ * It is the user's responsibility to prevent this situation from
+ * occurring. These problems are why SQL-2003 similarly specifies
+ * that for SQL MERGE, an exception must be raised in the event of
+ * an attempt to update the same row twice.
+ */
+ if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple.t_data)))
+ ereport(ERROR,
+ (errcode(ERRCODE_CARDINALITY_VIOLATION),
+ errmsg("ON CONFLICT DO UPDATE command cannot affect row a second time"),
+ errhint("Ensure that no rows proposed for insertion within the same command have duplicate constrained values.")));
+
+ /* This shouldn't happen */
+ elog(ERROR, "attempted to lock invisible tuple");
+
+ case HeapTupleSelfUpdated:
+
+ /*
+ * This state should never be reached. As a dirty snapshot is used
+ * to find conflicting tuples, speculative insertion wouldn't have
+ * seen this row to conflict with.
+ */
+ elog(ERROR, "unexpected self-updated tuple");
+
+ case HeapTupleUpdated:
+ if (IsolationUsesXactSnapshot())
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("could not serialize access due to concurrent update")));
+
+ /*
+ * Tell caller to try again from the very start.
+ *
+ * It does not make sense to use the usual EvalPlanQual() style
+ * loop here, as the new version of the row might not conflict
+ * anymore, or the conflicting tuple has actually been deleted.
+ */
+ ReleaseBuffer(buffer);
+ return false;
+
+ default:
+ elog(ERROR, "unrecognized heap_lock_tuple status: %u", test);
+ }
+
+ /*
+ * Success, the tuple is locked.
+ *
+ * Reset per-tuple memory context to free any expression evaluation
+ * storage allocated in the previous cycle.
+ */
+ ResetExprContext(econtext);
+
+ /*
+ * Verify that the tuple is visible to our MVCC snapshot if the current
+ * isolation level mandates that.
+ *
+ * It's not sufficient to rely on the check within ExecUpdate() as e.g.
+ * CONFLICT ... WHERE clause may prevent us from reaching that.
+ *
+ * This means we only ever continue when a new command in the current
+ * transaction could see the row, even though in READ COMMITTED mode the
+ * tuple will not be visible according to the current statement's
+ * snapshot. This is in line with the way UPDATE deals with newer tuple
+ * versions.
+ */
+ ExecCheckHeapTupleVisible(estate, &tuple, buffer);
+
+ /* Store target's existing tuple in the state's dedicated slot */
+ ExecStoreTuple(&tuple, mtstate->mt_existing, buffer, false);
+
+ /*
+ * Make tuple and any needed join variables available to ExecQual and
+ * ExecProject. The EXCLUDED tuple is installed in ecxt_innertuple, while
+ * the target's existing tuple is installed in the scantuple. EXCLUDED has
+ * been made to reference INNER_VAR in setrefs.c, but there is no other
+ * redirection.
+ */
+ econtext->ecxt_scantuple = mtstate->mt_existing;
+ econtext->ecxt_innertuple = excludedSlot;
+ econtext->ecxt_outertuple = NULL;
+
+ if (!ExecQual(onConflictSetWhere, econtext, false))
+ {
+ ReleaseBuffer(buffer);
+ InstrCountFiltered1(&mtstate->ps, 1);
+ return true; /* done with the tuple */
+ }
+
+ if (resultRelInfo->ri_WithCheckOptions != NIL)
+ {
+ /*
+ * Check target's existing tuple against UPDATE-applicable USING
+ * security barrier quals (if any), enforced here as RLS checks/WCOs.
+ *
+ * The rewriter creates UPDATE RLS checks/WCOs for UPDATE security
+ * quals, and stores them as WCOs of "kind" WCO_RLS_CONFLICT_CHECK,
+ * but that's almost the extent of its special handling for ON
+ * CONFLICT DO UPDATE.
+ *
+ * The rewriter will also have associated UPDATE applicable straight
+ * RLS checks/WCOs for the benefit of the ExecUpdate() call that
+ * follows. INSERTs and UPDATEs naturally have mutually exclusive WCO
+ * kinds, so there is no danger of spurious over-enforcement in the
+ * INSERT or UPDATE path.
+ */
+ ExecWithCheckOptions(WCO_RLS_CONFLICT_CHECK, resultRelInfo,
+ mtstate->mt_existing,
+ mtstate->ps.state);
+ }
+
+ /* Project the new tuple version */
+ ExecProject(resultRelInfo->ri_onConflictSetProj, NULL);
+
+ /* Execute UPDATE with projection */
+ *returning = ExecUpdate(&tuple.t_data->t_ctid, NULL,
+ mtstate->mt_conflproj, planSlot,
+ &mtstate->mt_epqstate, mtstate->ps.state,
+ canSetTag);
+
+ ReleaseBuffer(buffer);
+ return true;
+}
+
/*
* Process BEFORE EACH STATEMENT triggers
@@ -843,6 +1204,9 @@ fireBSTriggers(ModifyTableState *node)
{
case CMD_INSERT:
ExecBSInsertTriggers(node->ps.state, node->resultRelInfo);
+ if (node->mt_onconflict == ONCONFLICT_UPDATE)
+ ExecBSUpdateTriggers(node->ps.state,
+ node->resultRelInfo);
break;
case CMD_UPDATE:
ExecBSUpdateTriggers(node->ps.state, node->resultRelInfo);
@@ -865,6 +1229,9 @@ fireASTriggers(ModifyTableState *node)
switch (node->operation)
{
case CMD_INSERT:
+ if (node->mt_onconflict == ONCONFLICT_UPDATE)
+ ExecASUpdateTriggers(node->ps.state,
+ node->resultRelInfo);
ExecASInsertTriggers(node->ps.state, node->resultRelInfo);
break;
case CMD_UPDATE:
@@ -1062,7 +1429,9 @@ ExecModifyTable(ModifyTableState *node)
switch (operation)
{
case CMD_INSERT:
- slot = ExecInsert(slot, planSlot, estate, node->canSetTag);
+ slot = ExecInsert(node, slot, planSlot,
+ node->mt_arbiterindexes, node->mt_onconflict,
+ estate, node->canSetTag);
break;
case CMD_UPDATE:
slot = ExecUpdate(tupleid, oldtuple, slot, planSlot,
@@ -1137,6 +1506,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
mtstate->resultRelInfo = estate->es_result_relations + node->resultRelIndex;
mtstate->mt_arowmarks = (List **) palloc0(sizeof(List *) * nplans);
mtstate->mt_nplans = nplans;
+ mtstate->mt_onconflict = node->onConflictAction;
+ mtstate->mt_arbiterindexes = node->arbiterIndexes;
/* set up epqstate with dummy subplan data for the moment */
EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, node->epqParam);
@@ -1175,7 +1546,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
if (resultRelInfo->ri_RelationDesc->rd_rel->relhasindex &&
operation != CMD_DELETE &&
resultRelInfo->ri_IndexRelationDescs == NULL)
- ExecOpenIndices(resultRelInfo);
+ ExecOpenIndices(resultRelInfo, mtstate->mt_onconflict != ONCONFLICT_NONE);
/* Now init the plan for this result rel */
estate->es_result_relation_info = resultRelInfo;
@@ -1279,6 +1650,58 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
mtstate->ps.ps_ExprContext = NULL;
}
+ /*
+ * If needed, Initialize target list, projection and qual for ON CONFLICT
+ * DO UPDATE.
+ */
+ resultRelInfo = mtstate->resultRelInfo;
+ if (node->onConflictAction == ONCONFLICT_UPDATE)
+ {
+ ExprContext *econtext;
+ ExprState *setexpr;
+ TupleDesc tupDesc;
+
+ /* insert may only have one plan, inheritance is not expanded */
+ Assert(nplans == 1);
+
+ /* already exists if created by RETURNING processing above */
+ if (mtstate->ps.ps_ExprContext == NULL)
+ ExecAssignExprContext(estate, &mtstate->ps);
+
+ econtext = mtstate->ps.ps_ExprContext;
+
+ /* initialize slot for the existing tuple */
+ mtstate->mt_existing = ExecInitExtraTupleSlot(mtstate->ps.state);
+ ExecSetSlotDescriptor(mtstate->mt_existing,
+ resultRelInfo->ri_RelationDesc->rd_att);
+
+ mtstate->mt_excludedtlist = node->exclRelTlist;
+
+ /* create target slot for UPDATE SET projection */
+ tupDesc = ExecTypeFromTL((List *) node->onConflictSet,
+ false);
+ mtstate->mt_conflproj = ExecInitExtraTupleSlot(mtstate->ps.state);
+ ExecSetSlotDescriptor(mtstate->mt_conflproj, tupDesc);
+
+ /* build UPDATE SET expression and projection state */
+ setexpr = ExecInitExpr((Expr *) node->onConflictSet, &mtstate->ps);
+ resultRelInfo->ri_onConflictSetProj =
+ ExecBuildProjectionInfo((List *) setexpr, econtext,
+ mtstate->mt_conflproj,
+ resultRelInfo->ri_RelationDesc->rd_att);
+
+ /* build DO UPDATE WHERE clause expression */
+ if (node->onConflictWhere)
+ {
+ ExprState *qualexpr;
+
+ qualexpr = ExecInitExpr((Expr *) node->onConflictWhere,
+ mtstate->mt_plans[0]);
+
+ resultRelInfo->ri_onConflictSetWhere = (List *) qualexpr;
+ }
+ }
+
/*
* If we have any secondary relations in an UPDATE or DELETE, they need to
* be treated like non-locked relations in SELECT FOR UPDATE, ie, the
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 1b02be287c..a3139d3eb5 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -81,6 +81,7 @@ _copyPlannedStmt(const PlannedStmt *from)
COPY_SCALAR_FIELD(queryId);
COPY_SCALAR_FIELD(hasReturning);
COPY_SCALAR_FIELD(hasModifyingCTE);
+ COPY_SCALAR_FIELD(isUpsert);
COPY_SCALAR_FIELD(canSetTag);
COPY_SCALAR_FIELD(transientPlan);
COPY_NODE_FIELD(planTree);
@@ -185,6 +186,12 @@ _copyModifyTable(const ModifyTable *from)
COPY_NODE_FIELD(fdwPrivLists);
COPY_NODE_FIELD(rowMarks);
COPY_SCALAR_FIELD(epqParam);
+ COPY_SCALAR_FIELD(onConflictAction);
+ COPY_NODE_FIELD(arbiterIndexes);
+ COPY_NODE_FIELD(onConflictSet);
+ COPY_NODE_FIELD(onConflictWhere);
+ COPY_SCALAR_FIELD(exclRelRTI);
+ COPY_NODE_FIELD(exclRelTlist);
return newnode;
}
@@ -1786,6 +1793,22 @@ _copyCurrentOfExpr(const CurrentOfExpr *from)
return newnode;
}
+/*
+ * _copyInferenceElem
+ */
+static InferenceElem *
+_copyInferenceElem(const InferenceElem *from)
+{
+ InferenceElem *newnode = makeNode(InferenceElem);
+
+ COPY_NODE_FIELD(expr);
+ COPY_SCALAR_FIELD(infercollid);
+ COPY_SCALAR_FIELD(inferopfamily);
+ COPY_SCALAR_FIELD(inferopcinputtype);
+
+ return newnode;
+}
+
/*
* _copyTargetEntry
*/
@@ -1852,6 +1875,26 @@ _copyFromExpr(const FromExpr *from)
return newnode;
}
+/*
+ * _copyOnConflictExpr
+ */
+static OnConflictExpr *
+_copyOnConflictExpr(const OnConflictExpr *from)
+{
+ OnConflictExpr *newnode = makeNode(OnConflictExpr);
+
+ COPY_SCALAR_FIELD(action);
+ COPY_NODE_FIELD(arbiterElems);
+ COPY_NODE_FIELD(arbiterWhere);
+ COPY_NODE_FIELD(onConflictSet);
+ COPY_NODE_FIELD(onConflictWhere);
+ COPY_SCALAR_FIELD(constraint);
+ COPY_SCALAR_FIELD(exclRelIndex);
+ COPY_NODE_FIELD(exclRelTlist);
+
+ return newnode;
+}
+
/* ****************************************************************
* relation.h copy functions
*
@@ -2135,6 +2178,33 @@ _copyWithClause(const WithClause *from)
return newnode;
}
+static InferClause *
+_copyInferClause(const InferClause *from)
+{
+ InferClause *newnode = makeNode(InferClause);
+
+ COPY_NODE_FIELD(indexElems);
+ COPY_NODE_FIELD(whereClause);
+ COPY_STRING_FIELD(conname);
+ COPY_LOCATION_FIELD(location);
+
+ return newnode;
+}
+
+static OnConflictClause *
+_copyOnConflictClause(const OnConflictClause *from)
+{
+ OnConflictClause *newnode = makeNode(OnConflictClause);
+
+ COPY_SCALAR_FIELD(action);
+ COPY_NODE_FIELD(infer);
+ COPY_NODE_FIELD(targetList);
+ COPY_NODE_FIELD(whereClause);
+ COPY_LOCATION_FIELD(location);
+
+ return newnode;
+}
+
static CommonTableExpr *
_copyCommonTableExpr(const CommonTableExpr *from)
{
@@ -2552,6 +2622,7 @@ _copyQuery(const Query *from)
COPY_NODE_FIELD(jointree);
COPY_NODE_FIELD(targetList);
COPY_NODE_FIELD(withCheckOptions);
+ COPY_NODE_FIELD(onConflict);
COPY_NODE_FIELD(returningList);
COPY_NODE_FIELD(groupClause);
COPY_NODE_FIELD(havingQual);
@@ -2575,6 +2646,7 @@ _copyInsertStmt(const InsertStmt *from)
COPY_NODE_FIELD(relation);
COPY_NODE_FIELD(cols);
COPY_NODE_FIELD(selectStmt);
+ COPY_NODE_FIELD(onConflictClause);
COPY_NODE_FIELD(returningList);
COPY_NODE_FIELD(withClause);
@@ -4283,6 +4355,9 @@ copyObject(const void *from)
case T_CurrentOfExpr:
retval = _copyCurrentOfExpr(from);
break;
+ case T_InferenceElem:
+ retval = _copyInferenceElem(from);
+ break;
case T_TargetEntry:
retval = _copyTargetEntry(from);
break;
@@ -4295,6 +4370,9 @@ copyObject(const void *from)
case T_FromExpr:
retval = _copyFromExpr(from);
break;
+ case T_OnConflictExpr:
+ retval = _copyOnConflictExpr(from);
+ break;
/*
* RELATION NODES
@@ -4753,6 +4831,12 @@ copyObject(const void *from)
case T_WithClause:
retval = _copyWithClause(from);
break;
+ case T_InferClause:
+ retval = _copyInferClause(from);
+ break;
+ case T_OnConflictClause:
+ retval = _copyOnConflictClause(from);
+ break;
case T_CommonTableExpr:
retval = _copyCommonTableExpr(from);
break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 1b9a83b93e..7c86e919a4 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -682,6 +682,17 @@ _equalCurrentOfExpr(const CurrentOfExpr *a, const CurrentOfExpr *b)
return true;
}
+static bool
+_equalInferenceElem(const InferenceElem *a, const InferenceElem *b)
+{
+ COMPARE_NODE_FIELD(expr);
+ COMPARE_SCALAR_FIELD(infercollid);
+ COMPARE_SCALAR_FIELD(inferopfamily);
+ COMPARE_SCALAR_FIELD(inferopcinputtype);
+
+ return true;
+}
+
static bool
_equalTargetEntry(const TargetEntry *a, const TargetEntry *b)
{
@@ -728,6 +739,20 @@ _equalFromExpr(const FromExpr *a, const FromExpr *b)
return true;
}
+static bool
+_equalOnConflictExpr(const OnConflictExpr *a, const OnConflictExpr *b)
+{
+ COMPARE_SCALAR_FIELD(action);
+ COMPARE_NODE_FIELD(arbiterElems);
+ COMPARE_NODE_FIELD(arbiterWhere);
+ COMPARE_NODE_FIELD(onConflictSet);
+ COMPARE_NODE_FIELD(onConflictWhere);
+ COMPARE_SCALAR_FIELD(constraint);
+ COMPARE_SCALAR_FIELD(exclRelIndex);
+ COMPARE_NODE_FIELD(exclRelTlist);
+
+ return true;
+}
/*
* Stuff from relation.h
@@ -868,6 +893,7 @@ _equalQuery(const Query *a, const Query *b)
COMPARE_NODE_FIELD(jointree);
COMPARE_NODE_FIELD(targetList);
COMPARE_NODE_FIELD(withCheckOptions);
+ COMPARE_NODE_FIELD(onConflict);
COMPARE_NODE_FIELD(returningList);
COMPARE_NODE_FIELD(groupClause);
COMPARE_NODE_FIELD(havingQual);
@@ -889,6 +915,7 @@ _equalInsertStmt(const InsertStmt *a, const InsertStmt *b)
COMPARE_NODE_FIELD(relation);
COMPARE_NODE_FIELD(cols);
COMPARE_NODE_FIELD(selectStmt);
+ COMPARE_NODE_FIELD(onConflictClause);
COMPARE_NODE_FIELD(returningList);
COMPARE_NODE_FIELD(withClause);
@@ -2433,6 +2460,29 @@ _equalWithClause(const WithClause *a, const WithClause *b)
return true;
}
+static bool
+_equalInferClause(const InferClause *a, const InferClause *b)
+{
+ COMPARE_NODE_FIELD(indexElems);
+ COMPARE_NODE_FIELD(whereClause);
+ COMPARE_STRING_FIELD(conname);
+ COMPARE_LOCATION_FIELD(location);
+
+ return true;
+}
+
+static bool
+_equalOnConflictClause(const OnConflictClause *a, const OnConflictClause *b)
+{
+ COMPARE_SCALAR_FIELD(action);
+ COMPARE_NODE_FIELD(infer);
+ COMPARE_NODE_FIELD(targetList);
+ COMPARE_NODE_FIELD(whereClause);
+ COMPARE_LOCATION_FIELD(location);
+
+ return true;
+}
+
static bool
_equalCommonTableExpr(const CommonTableExpr *a, const CommonTableExpr *b)
{
@@ -2712,6 +2762,9 @@ equal(const void *a, const void *b)
case T_CurrentOfExpr:
retval = _equalCurrentOfExpr(a, b);
break;
+ case T_InferenceElem:
+ retval = _equalInferenceElem(a, b);
+ break;
case T_TargetEntry:
retval = _equalTargetEntry(a, b);
break;
@@ -2721,6 +2774,9 @@ equal(const void *a, const void *b)
case T_FromExpr:
retval = _equalFromExpr(a, b);
break;
+ case T_OnConflictExpr:
+ retval = _equalOnConflictExpr(a, b);
+ break;
case T_JoinExpr:
retval = _equalJoinExpr(a, b);
break;
@@ -3169,6 +3225,12 @@ equal(const void *a, const void *b)
case T_WithClause:
retval = _equalWithClause(a, b);
break;
+ case T_InferClause:
+ retval = _equalInferClause(a, b);
+ break;
+ case T_OnConflictClause:
+ retval = _equalOnConflictClause(a, b);
+ break;
case T_CommonTableExpr:
retval = _equalCommonTableExpr(a, b);
break;
diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c
index d6f1f5bb6d..4135f9c3cf 100644
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -235,6 +235,13 @@ exprType(const Node *expr)
case T_CurrentOfExpr:
type = BOOLOID;
break;
+ case T_InferenceElem:
+ {
+ const InferenceElem *n = (const InferenceElem *) expr;
+
+ type = exprType((Node *) n->expr);
+ }
+ break;
case T_PlaceHolderVar:
type = exprType((Node *) ((const PlaceHolderVar *) expr)->phexpr);
break;
@@ -894,6 +901,9 @@ exprCollation(const Node *expr)
case T_CurrentOfExpr:
coll = InvalidOid; /* result is always boolean */
break;
+ case T_InferenceElem:
+ coll = exprCollation((Node *) ((const InferenceElem *) expr)->expr);
+ break;
case T_PlaceHolderVar:
coll = exprCollation((Node *) ((const PlaceHolderVar *) expr)->phexpr);
break;
@@ -1484,6 +1494,12 @@ exprLocation(const Node *expr)
case T_WithClause:
loc = ((const WithClause *) expr)->location;
break;
+ case T_InferClause:
+ loc = ((const InferClause *) expr)->location;
+ break;
+ case T_OnConflictClause:
+ loc = ((const OnConflictClause *) expr)->location;
+ break;
case T_CommonTableExpr:
loc = ((const CommonTableExpr *) expr)->location;
break;
@@ -1491,6 +1507,10 @@ exprLocation(const Node *expr)
/* just use argument's location */
loc = exprLocation((Node *) ((const PlaceHolderVar *) expr)->phexpr);
break;
+ case T_InferenceElem:
+ /* just use nested expr's location */
+ loc = exprLocation((Node *) ((const InferenceElem *) expr)->expr);
+ break;
default:
/* for any other node type it's just unknown... */
loc = -1;
@@ -1890,6 +1910,20 @@ expression_tree_walker(Node *node,
return true;
}
break;
+ case T_OnConflictExpr:
+ {
+ OnConflictExpr *onconflict = (OnConflictExpr *) node;
+
+ if (walker((Node *) onconflict->arbiterElems, context))
+ return true;
+ if (walker(onconflict->arbiterWhere, context))
+ return true;
+ if (walker(onconflict->onConflictSet, context))
+ return true;
+ if (walker(onconflict->onConflictWhere, context))
+ return true;
+ }
+ break;
case T_JoinExpr:
{
JoinExpr *join = (JoinExpr *) node;
@@ -1920,6 +1954,8 @@ expression_tree_walker(Node *node,
break;
case T_PlaceHolderVar:
return walker(((PlaceHolderVar *) node)->phexpr, context);
+ case T_InferenceElem:
+ return walker(((InferenceElem *) node)->expr, context);
case T_AppendRelInfo:
{
AppendRelInfo *appinfo = (AppendRelInfo *) node;
@@ -1968,6 +2004,8 @@ query_tree_walker(Query *query,
return true;
if (walker((Node *) query->withCheckOptions, context))
return true;
+ if (walker((Node *) query->onConflict, context))
+ return true;
if (walker((Node *) query->returningList, context))
return true;
if (walker((Node *) query->jointree, context))
@@ -2594,6 +2632,20 @@ expression_tree_mutator(Node *node,
return (Node *) newnode;
}
break;
+ case T_OnConflictExpr:
+ {
+ OnConflictExpr *oc = (OnConflictExpr *) node;
+ OnConflictExpr *newnode;
+
+ FLATCOPY(newnode, oc, OnConflictExpr);
+ MUTATE(newnode->arbiterElems, oc->arbiterElems, List *);
+ MUTATE(newnode->arbiterWhere, oc->arbiterWhere, Node *);
+ MUTATE(newnode->onConflictSet, oc->onConflictSet, List *);
+ MUTATE(newnode->onConflictWhere, oc->onConflictWhere, Node *);
+
+ return (Node *) newnode;
+ }
+ break;
case T_JoinExpr:
{
JoinExpr *join = (JoinExpr *) node;
@@ -2630,6 +2682,16 @@ expression_tree_mutator(Node *node,
return (Node *) newnode;
}
break;
+ case T_InferenceElem:
+ {
+ InferenceElem *inferenceelemdexpr = (InferenceElem *) node;
+ InferenceElem *newnode;
+
+ FLATCOPY(newnode, inferenceelemdexpr, InferenceElem);
+ MUTATE(newnode->expr, newnode->expr, Node *);
+ return (Node *) newnode;
+ }
+ break;
case T_AppendRelInfo:
{
AppendRelInfo *appinfo = (AppendRelInfo *) node;
@@ -2709,6 +2771,7 @@ query_tree_mutator(Query *query,
MUTATE(query->targetList, query->targetList, List *);
MUTATE(query->withCheckOptions, query->withCheckOptions, List *);
+ MUTATE(query->onConflict, query->onConflict, OnConflictExpr *);
MUTATE(query->returningList, query->returningList, List *);
MUTATE(query->jointree, query->jointree, FromExpr *);
MUTATE(query->setOperations, query->setOperations, Node *);
@@ -2978,6 +3041,8 @@ raw_expression_tree_walker(Node *node,
return true;
if (walker(stmt->selectStmt, context))
return true;
+ if (walker(stmt->onConflictClause, context))
+ return true;
if (walker(stmt->returningList, context))
return true;
if (walker(stmt->withClause, context))
@@ -3217,6 +3282,28 @@ raw_expression_tree_walker(Node *node,
break;
case T_WithClause:
return walker(((WithClause *) node)->ctes, context);
+ case T_InferClause:
+ {
+ InferClause *stmt = (InferClause *) node;
+
+ if (walker(stmt->indexElems, context))
+ return true;
+ if (walker(stmt->whereClause, context))
+ return true;
+ }
+ break;
+ case T_OnConflictClause:
+ {
+ OnConflictClause *stmt = (OnConflictClause *) node;
+
+ if (walker(stmt->infer, context))
+ return true;
+ if (walker(stmt->targetList, context))
+ return true;
+ if (walker(stmt->whereClause, context))
+ return true;
+ }
+ break;
case T_CommonTableExpr:
return walker(((CommonTableExpr *) node)->ctequery, context);
default:
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index d5ddd0b359..bc891d391f 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -243,6 +243,7 @@ _outPlannedStmt(StringInfo str, const PlannedStmt *node)
WRITE_UINT_FIELD(queryId);
WRITE_BOOL_FIELD(hasReturning);
WRITE_BOOL_FIELD(hasModifyingCTE);
+ WRITE_BOOL_FIELD(isUpsert);
WRITE_BOOL_FIELD(canSetTag);
WRITE_BOOL_FIELD(transientPlan);
WRITE_NODE_FIELD(planTree);
@@ -337,6 +338,12 @@ _outModifyTable(StringInfo str, const ModifyTable *node)
WRITE_NODE_FIELD(fdwPrivLists);
WRITE_NODE_FIELD(rowMarks);
WRITE_INT_FIELD(epqParam);
+ WRITE_ENUM_FIELD(onConflictAction, OnConflictAction);
+ WRITE_NODE_FIELD(arbiterIndexes);
+ WRITE_NODE_FIELD(onConflictSet);
+ WRITE_NODE_FIELD(onConflictWhere);
+ WRITE_INT_FIELD(exclRelRTI);
+ WRITE_NODE_FIELD(exclRelTlist);
}
static void
@@ -1436,6 +1443,17 @@ _outCurrentOfExpr(StringInfo str, const CurrentOfExpr *node)
WRITE_INT_FIELD(cursor_param);
}
+static void
+_outInferenceElem(StringInfo str, const InferenceElem *node)
+{
+ WRITE_NODE_TYPE("INFERENCEELEM");
+
+ WRITE_NODE_FIELD(expr);
+ WRITE_OID_FIELD(infercollid);
+ WRITE_OID_FIELD(inferopfamily);
+ WRITE_OID_FIELD(inferopcinputtype);
+}
+
static void
_outTargetEntry(StringInfo str, const TargetEntry *node)
{
@@ -1482,6 +1500,21 @@ _outFromExpr(StringInfo str, const FromExpr *node)
WRITE_NODE_FIELD(quals);
}
+static void
+_outOnConflictExpr(StringInfo str, const OnConflictExpr *node)
+{
+ WRITE_NODE_TYPE("ONCONFLICTEXPR");
+
+ WRITE_ENUM_FIELD(action, OnConflictAction);
+ WRITE_NODE_FIELD(arbiterElems);
+ WRITE_NODE_FIELD(arbiterWhere);
+ WRITE_NODE_FIELD(onConflictSet);
+ WRITE_NODE_FIELD(onConflictWhere);
+ WRITE_OID_FIELD(constraint);
+ WRITE_INT_FIELD(exclRelIndex);
+ WRITE_NODE_FIELD(exclRelTlist);
+}
+
/*****************************************************************************
*
* Stuff from relation.h.
@@ -2319,6 +2352,7 @@ _outQuery(StringInfo str, const Query *node)
WRITE_NODE_FIELD(jointree);
WRITE_NODE_FIELD(targetList);
WRITE_NODE_FIELD(withCheckOptions);
+ WRITE_NODE_FIELD(onConflict);
WRITE_NODE_FIELD(returningList);
WRITE_NODE_FIELD(groupClause);
WRITE_NODE_FIELD(havingQual);
@@ -3112,6 +3146,9 @@ _outNode(StringInfo str, const void *obj)
case T_CurrentOfExpr:
_outCurrentOfExpr(str, obj);
break;
+ case T_InferenceElem:
+ _outInferenceElem(str, obj);
+ break;
case T_TargetEntry:
_outTargetEntry(str, obj);
break;
@@ -3124,7 +3161,9 @@ _outNode(StringInfo str, const void *obj)
case T_FromExpr:
_outFromExpr(str, obj);
break;
-
+ case T_OnConflictExpr:
+ _outOnConflictExpr(str, obj);
+ break;
case T_Path:
_outPath(str, obj);
break;
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index d1ced0cc4b..8136306e1e 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -214,6 +214,7 @@ _readQuery(void)
READ_NODE_FIELD(jointree);
READ_NODE_FIELD(targetList);
READ_NODE_FIELD(withCheckOptions);
+ READ_NODE_FIELD(onConflict);
READ_NODE_FIELD(returningList);
READ_NODE_FIELD(groupClause);
READ_NODE_FIELD(havingQual);
@@ -1130,6 +1131,22 @@ _readCurrentOfExpr(void)
READ_DONE();
}
+/*
+ * _readInferenceElem
+ */
+static InferenceElem *
+_readInferenceElem(void)
+{
+ READ_LOCALS(InferenceElem);
+
+ READ_NODE_FIELD(expr);
+ READ_OID_FIELD(infercollid);
+ READ_OID_FIELD(inferopfamily);
+ READ_OID_FIELD(inferopcinputtype);
+
+ READ_DONE();
+}
+
/*
* _readTargetEntry
*/
@@ -1196,6 +1213,25 @@ _readFromExpr(void)
READ_DONE();
}
+/*
+ * _readOnConflictExpr
+ */
+static OnConflictExpr *
+_readOnConflictExpr(void)
+{
+ READ_LOCALS(OnConflictExpr);
+
+ READ_ENUM_FIELD(action, OnConflictAction);
+ READ_NODE_FIELD(arbiterElems);
+ READ_NODE_FIELD(arbiterWhere);
+ READ_NODE_FIELD(onConflictSet);
+ READ_NODE_FIELD(onConflictWhere);
+ READ_OID_FIELD(constraint);
+ READ_INT_FIELD(exclRelIndex);
+ READ_NODE_FIELD(exclRelTlist);
+
+ READ_DONE();
+}
/*
* Stuff from parsenodes.h.
@@ -1395,6 +1431,8 @@ parseNodeString(void)
return_value = _readSetToDefault();
else if (MATCH("CURRENTOFEXPR", 13))
return_value = _readCurrentOfExpr();
+ else if (MATCH("INFERENCEELEM", 13))
+ return_value = _readInferenceElem();
else if (MATCH("TARGETENTRY", 11))
return_value = _readTargetEntry();
else if (MATCH("RANGETBLREF", 11))
@@ -1403,6 +1441,8 @@ parseNodeString(void)
return_value = _readJoinExpr();
else if (MATCH("FROMEXPR", 8))
return_value = _readFromExpr();
+ else if (MATCH("ONCONFLICTEXPR", 14))
+ return_value = _readOnConflictExpr();
else if (MATCH("RTE", 3))
return_value = _readRangeTblEntry();
else if (MATCH("RANGETBLFUNCTION", 16))
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index eeb2a41764..3246332d6e 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -4868,7 +4868,7 @@ make_modifytable(PlannerInfo *root,
Index nominalRelation,
List *resultRelations, List *subplans,
List *withCheckOptionLists, List *returningLists,
- List *rowMarks, int epqParam)
+ List *rowMarks, OnConflictExpr *onconflict, int epqParam)
{
ModifyTable *node = makeNode(ModifyTable);
Plan *plan = &node->plan;
@@ -4918,6 +4918,30 @@ make_modifytable(PlannerInfo *root,
node->resultRelations = resultRelations;
node->resultRelIndex = -1; /* will be set correctly in setrefs.c */
node->plans = subplans;
+ if (!onconflict)
+ {
+ node->onConflictAction = ONCONFLICT_NONE;
+ node->onConflictSet = NIL;
+ node->onConflictWhere = NULL;
+ node->arbiterIndexes = NIL;
+ }
+ else
+ {
+ node->onConflictAction = onconflict->action;
+ node->onConflictSet = onconflict->onConflictSet;
+ node->onConflictWhere = onconflict->onConflictWhere;
+
+ /*
+ * If a set of unique index inference elements was provided (an
+ * INSERT...ON CONFLICT "inference specification"), then infer
+ * appropriate unique indexes (or throw an error if none are
+ * available).
+ */
+ node->arbiterIndexes = infer_arbiter_indexes(root);
+
+ node->exclRelRTI = onconflict->exclRelIndex;
+ node->exclRelTlist = onconflict->exclRelTlist;
+ }
node->withCheckOptionLists = withCheckOptionLists;
node->returningLists = returningLists;
node->rowMarks = rowMarks;
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index ea4d4c55cb..c80d45acaa 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -243,6 +243,8 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
result->queryId = parse->queryId;
result->hasReturning = (parse->returningList != NIL);
result->hasModifyingCTE = parse->hasModifyingCTE;
+ result->isUpsert =
+ (parse->onConflict && parse->onConflict->action == ONCONFLICT_UPDATE);
result->canSetTag = parse->canSetTag;
result->transientPlan = glob->transientPlan;
result->planTree = top_plan;
@@ -462,6 +464,17 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
parse->limitCount = preprocess_expression(root, parse->limitCount,
EXPRKIND_LIMIT);
+ if (parse->onConflict)
+ {
+ parse->onConflict->onConflictSet = (List *)
+ preprocess_expression(root, (Node *) parse->onConflict->onConflictSet,
+ EXPRKIND_TARGET);
+
+ parse->onConflict->onConflictWhere =
+ preprocess_expression(root, (Node *) parse->onConflict->onConflictWhere,
+ EXPRKIND_QUAL);
+ }
+
root->append_rel_list = (List *)
preprocess_expression(root, (Node *) root->append_rel_list,
EXPRKIND_APPINFO);
@@ -612,6 +625,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
withCheckOptionLists,
returningLists,
rowMarks,
+ parse->onConflict,
SS_assign_special_param(root));
}
}
@@ -802,6 +816,8 @@ inheritance_planner(PlannerInfo *root)
List *rowMarks;
ListCell *lc;
+ Assert(parse->commandType != CMD_INSERT);
+
/*
* We generate a modified instance of the original Query for each target
* relation, plan that, and put all the plans into a list that will be
@@ -1046,6 +1062,8 @@ inheritance_planner(PlannerInfo *root)
if (parse->returningList)
returningLists = lappend(returningLists,
subroot.parse->returningList);
+
+ Assert(!parse->onConflict);
}
/* Mark result as unordered (probably unnecessary) */
@@ -1095,6 +1113,7 @@ inheritance_planner(PlannerInfo *root)
withCheckOptionLists,
returningLists,
rowMarks,
+ NULL,
SS_assign_special_param(root));
}
@@ -1228,6 +1247,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
bool use_hashed_grouping = false;
WindowFuncLists *wflists = NULL;
List *activeWindows = NIL;
+ OnConflictExpr *onconfl;
MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
@@ -1242,6 +1262,13 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
/* Preprocess targetlist */
tlist = preprocess_targetlist(root, tlist);
+ onconfl = parse->onConflict;
+ if (onconfl)
+ onconfl->onConflictSet =
+ preprocess_onconflict_targetlist(onconfl->onConflictSet,
+ parse->resultRelation,
+ parse->rtable);
+
/*
* Expand any rangetable entries that have security barrier quals.
* This may add new security barrier subquery RTEs to the rangetable.
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index b7d6ff1122..612d32571a 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -739,7 +739,35 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
splan->plan.targetlist = copyObject(linitial(newRL));
}
+ /*
+ * We treat ModifyTable with ON CONFLICT as a form of 'pseudo
+ * join', where the inner side is the EXLUDED tuple. Therefore
+ * use fix_join_expr to setup the relevant variables to
+ * INNER_VAR. We explicitly don't create any OUTER_VARs as
+ * those are already used by RETURNING and it seems better to
+ * be non-conflicting.
+ */
+ if (splan->onConflictSet)
+ {
+ indexed_tlist *itlist;
+
+ itlist = build_tlist_index(splan->exclRelTlist);
+
+ splan->onConflictSet =
+ fix_join_expr(root, splan->onConflictSet,
+ NULL, itlist,
+ linitial_int(splan->resultRelations),
+ rtoffset);
+
+ splan->onConflictWhere = (Node *)
+ fix_join_expr(root, (List *) splan->onConflictWhere,
+ NULL, itlist,
+ linitial_int(splan->resultRelations),
+ rtoffset);
+ }
+
splan->nominalRelation += rtoffset;
+ splan->exclRelRTI += rtoffset;
foreach(l, splan->resultRelations)
{
@@ -1846,7 +1874,8 @@ search_indexed_tlist_for_sortgroupref(Node *node,
* inner_itlist = NULL and acceptable_rel = the ID of the target relation.
*
* 'clauses' is the targetlist or list of join clauses
- * 'outer_itlist' is the indexed target list of the outer join relation
+ * 'outer_itlist' is the indexed target list of the outer join relation,
+ * or NULL
* 'inner_itlist' is the indexed target list of the inner join relation,
* or NULL
* 'acceptable_rel' is either zero or the rangetable index of a relation
@@ -1886,12 +1915,17 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context)
Var *var = (Var *) node;
/* First look for the var in the input tlists */
- newvar = search_indexed_tlist_for_var(var,
- context->outer_itlist,
- OUTER_VAR,
- context->rtoffset);
- if (newvar)
- return (Node *) newvar;
+ if (context->outer_itlist)
+ {
+ newvar = search_indexed_tlist_for_var(var,
+ context->outer_itlist,
+ OUTER_VAR,
+ context->rtoffset);
+ if (newvar)
+ return (Node *) newvar;
+ }
+
+ /* Then in the outer */
if (context->inner_itlist)
{
newvar = search_indexed_tlist_for_var(var,
@@ -1920,7 +1954,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context)
PlaceHolderVar *phv = (PlaceHolderVar *) node;
/* See if the PlaceHolderVar has bubbled up from a lower plan node */
- if (context->outer_itlist->has_ph_vars)
+ if (context->outer_itlist && context->outer_itlist->has_ph_vars)
{
newvar = search_indexed_tlist_for_non_var((Node *) phv,
context->outer_itlist,
@@ -1943,7 +1977,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context)
if (IsA(node, Param))
return fix_param_node(context->root, (Param *) node);
/* Try matching more complex expressions too, if tlists have any */
- if (context->outer_itlist->has_non_vars)
+ if (context->outer_itlist && context->outer_itlist->has_non_vars)
{
newvar = search_indexed_tlist_for_non_var(node,
context->outer_itlist,
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index acfd0bcfbe..0220672fc4 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -2340,6 +2340,10 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
locally_added_param);
finalize_primnode((Node *) mtplan->returningLists,
&context);
+ finalize_primnode((Node *) mtplan->onConflictSet,
+ &context);
+ finalize_primnode((Node *) mtplan->onConflictWhere,
+ &context);
foreach(l, mtplan->plans)
{
context.paramids =
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index 50acfe40e9..4f0dc80d02 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -1030,6 +1030,9 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
pullup_replace_vars((Node *) parse->targetList, &rvcontext);
parse->returningList = (List *)
pullup_replace_vars((Node *) parse->returningList, &rvcontext);
+ if (parse->onConflict)
+ parse->onConflict->onConflictSet = (List *)
+ pullup_replace_vars((Node *) parse->onConflict->onConflictSet, &rvcontext);
replace_vars_in_jointree((Node *) parse->jointree, &rvcontext,
lowest_nulling_outer_join);
Assert(parse->setOperations == NULL);
@@ -1605,6 +1608,9 @@ pull_up_simple_values(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte)
pullup_replace_vars((Node *) parse->targetList, &rvcontext);
parse->returningList = (List *)
pullup_replace_vars((Node *) parse->returningList, &rvcontext);
+ if (parse->onConflict)
+ parse->onConflict->onConflictSet = (List *)
+ pullup_replace_vars((Node *) parse->onConflict->onConflictSet, &rvcontext);
replace_vars_in_jointree((Node *) parse->jointree, &rvcontext, NULL);
Assert(parse->setOperations == NULL);
parse->havingQual = pullup_replace_vars(parse->havingQual, &rvcontext);
diff --git a/src/backend/optimizer/prep/preptlist.c b/src/backend/optimizer/prep/preptlist.c
index 580c846770..6b0c689e0c 100644
--- a/src/backend/optimizer/prep/preptlist.c
+++ b/src/backend/optimizer/prep/preptlist.c
@@ -181,6 +181,19 @@ preprocess_targetlist(PlannerInfo *root, List *tlist)
return tlist;
}
+/*
+ * preprocess_onconflict_targetlist
+ * Process ON CONFLICT SET targetlist.
+ *
+ * Returns the new targetlist.
+ */
+List *
+preprocess_onconflict_targetlist(List *tlist, int result_relation, List *range_table)
+{
+ return expand_targetlist(tlist, CMD_UPDATE, result_relation, range_table);
+}
+
+
/*****************************************************************************
*
* TARGETLIST EXPANSION
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 068ab39dd4..8bcc5064a3 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -25,6 +25,7 @@
#include "access/transam.h"
#include "access/xlog.h"
#include "catalog/catalog.h"
+#include "catalog/dependency.h"
#include "catalog/heap.h"
#include "foreign/fdwapi.h"
#include "miscadmin.h"
@@ -50,6 +51,8 @@ int constraint_exclusion = CONSTRAINT_EXCLUSION_PARTITION;
get_relation_info_hook_type get_relation_info_hook = NULL;
+static bool infer_collation_opclass_match(InferenceElem *elem, Relation idxRel,
+ Bitmapset *inferAttrs, List *idxExprs);
static int32 get_rel_data_width(Relation rel, int32 *attr_widths);
static List *get_relation_constraints(PlannerInfo *root,
Oid relationObjectId, RelOptInfo *rel,
@@ -399,6 +402,355 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
(*get_relation_info_hook) (root, relationObjectId, inhparent, rel);
}
+/*
+ * infer_arbiter_indexes -
+ * Determine the unique indexes used to arbitrate speculative insertion.
+ *
+ * Uses user-supplied inference clause expressions and predicate to match a
+ * unique index from those defined and ready on the heap relation (target).
+ * An exact match is required on columns/expressions (although they can appear
+ * in any order). However, the predicate given by the user need only restrict
+ * insertion to a subset of some part of the table covered by some particular
+ * unique index (in particular, a partial unique index) in order to be
+ * inferred.
+ *
+ * The implementation does not consider which B-Tree operator class any
+ * particular available unique index attribute uses, unless one was specified
+ * in the inference specification. The same is true of collations. In
+ * particular, there is no system dependency on the default operator class for
+ * the purposes of inference. If no opclass (or collation) is specified, then
+ * all matching indexes (that may or may not match the default in terms of
+ * each attribute opclass/collation) are used for inference.
+ */
+List *
+infer_arbiter_indexes(PlannerInfo *root)
+{
+ OnConflictExpr *onconflict = root->parse->onConflict;
+ /* Iteration state */
+ Relation relation;
+ Oid relationObjectId;
+ Oid indexOidFromConstraint = InvalidOid;
+ List *indexList;
+ ListCell *l;
+
+ /* Normalized inference attributes and inference expressions: */
+ Bitmapset *inferAttrs = NULL;
+ List *inferElems = NIL;
+
+ /* Result */
+ List *candidates = NIL;
+
+ /*
+ * Quickly return NIL for ON CONFLICT DO NOTHING without an inference
+ * specification or named constraint. ON CONFLICT DO UPDATE statements
+ * must always provide one or the other (but parser ought to have caught
+ * that already).
+ */
+ if (onconflict->arbiterElems == NIL &&
+ onconflict->constraint == InvalidOid)
+ return NIL;
+
+ /*
+ * We need not lock the relation since it was already locked, either by
+ * the rewriter or when expand_inherited_rtentry() added it to the query's
+ * rangetable.
+ */
+ relationObjectId = rt_fetch(root->parse->resultRelation,
+ root->parse->rtable)->relid;
+
+ relation = heap_open(relationObjectId, NoLock);
+
+ /*
+ * Build normalized/BMS representation of plain indexed attributes, as
+ * well as direct list of inference elements. This is required for
+ * matching the cataloged definition of indexes.
+ */
+ foreach(l, onconflict->arbiterElems)
+ {
+ InferenceElem *elem;
+ Var *var;
+ int attno;
+
+ elem = (InferenceElem *) lfirst(l);
+
+ /*
+ * Parse analysis of inference elements performs full parse analysis
+ * of Vars, even for non-expression indexes (in contrast with utility
+ * command related use of IndexElem). However, indexes are cataloged
+ * with simple attribute numbers for non-expression indexes. Those
+ * are handled later.
+ */
+ if (!IsA(elem->expr, Var))
+ {
+ inferElems = lappend(inferElems, elem->expr);
+ continue;
+ }
+
+ var = (Var *) elem->expr;
+ attno = var->varattno;
+
+ if (attno < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("system columns cannot be used in an ON CONFLICT clause")));
+ else if (attno == 0)
+ elog(ERROR, "whole row unique index inference specifications are not valid");
+
+ inferAttrs = bms_add_member(inferAttrs, attno);
+ }
+
+ /*
+ * Lookup named constraint's index. This is not immediately returned
+ * because some additional sanity checks are required.
+ */
+ if (onconflict->constraint != InvalidOid)
+ {
+ indexOidFromConstraint = get_constraint_index(onconflict->constraint);
+
+ if (indexOidFromConstraint == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("constraint in ON CONFLICT clause has no associated index")));
+ }
+
+ indexList = RelationGetIndexList(relation);
+
+ /*
+ * Using that representation, iterate through the list of indexes on the
+ * target relation to try and find a match
+ */
+ foreach(l, indexList)
+ {
+ Oid indexoid = lfirst_oid(l);
+ Relation idxRel;
+ Form_pg_index idxForm;
+ Bitmapset *indexedAttrs = NULL;
+ List *idxExprs;
+ List *predExprs;
+ List *whereExplicit;
+ AttrNumber natt;
+ ListCell *el;
+
+ /*
+ * Extract info from the relation descriptor for the index. We know
+ * that this is a target, so get lock type it is known will ultimately
+ * be required by the executor.
+ *
+ * Let executor complain about !indimmediate case directly, because
+ * enforcement needs to occur there anyway when an inference clause is
+ * omitted.
+ */
+ idxRel = index_open(indexoid, RowExclusiveLock);
+ idxForm = idxRel->rd_index;
+
+ if (!IndexIsValid(idxForm))
+ goto next;
+
+ /*
+ * If the index is valid, but cannot yet be used, ignore it. See
+ * src/backend/access/heap/README.HOT for discussion.
+ */
+ if (idxForm->indcheckxmin &&
+ !TransactionIdPrecedes(HeapTupleHeaderGetXmin(idxRel->rd_indextuple->t_data),
+ TransactionXmin))
+ goto next;
+
+ /*
+ * Look for match on "ON constraint_name" variant, which may not be
+ * unique constraint. This can only be a constraint name.
+ */
+ if (indexOidFromConstraint == idxForm->indexrelid)
+ {
+ if (!idxForm->indisunique && onconflict->action == ONCONFLICT_UPDATE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("ON CONFLICT DO UPDATE not supported with exclusion constraints")));
+
+ list_free(indexList);
+ index_close(idxRel, NoLock);
+ heap_close(relation, NoLock);
+ candidates = lappend_oid(candidates, idxForm->indexrelid);
+ return candidates;
+ }
+ else if (indexOidFromConstraint != InvalidOid)
+ {
+ /* No point in further work for index in named constraint case */
+ goto next;
+ }
+
+ /*
+ * Only considering conventional inference at this point (not named
+ * constraints), so index under consideration can be immediately
+ * skipped if it's not unique
+ */
+ if (!idxForm->indisunique)
+ goto next;
+
+ /* Build BMS representation of cataloged index attributes */
+ for (natt = 0; natt < idxForm->indnatts; natt++)
+ {
+ int attno = idxRel->rd_index->indkey.values[natt];
+
+ if (attno < 0)
+ elog(ERROR, "system column in index");
+
+ if (attno != 0)
+ indexedAttrs = bms_add_member(indexedAttrs, attno);
+ }
+
+ /* Non-expression attributes (if any) must match */
+ if (!bms_equal(indexedAttrs, inferAttrs))
+ goto next;
+
+ /* Expression attributes (if any) must match */
+ idxExprs = RelationGetIndexExpressions(idxRel);
+ foreach(el, onconflict->arbiterElems)
+ {
+ InferenceElem *elem = (InferenceElem *) lfirst(el);
+
+ /*
+ * Ensure that collation/opclass aspects of inference expression
+ * element match. Even though this loop is primarily concerned
+ * with matching expressions, it is a convenient point to check
+ * this for both expressions and ordinary (non-expression)
+ * attributes appearing as inference elements.
+ */
+ if (!infer_collation_opclass_match(elem, idxRel, inferAttrs,
+ idxExprs))
+ goto next;
+
+ /*
+ * Plain Vars don't factor into count of expression elements, and
+ * the question of whether or not they satisfy the index
+ * definition has already been considered (they must).
+ */
+ if (IsA(elem->expr, Var))
+ continue;
+
+ /*
+ * Might as well avoid redundant check in the rare cases where
+ * infer_collation_opclass_match() is required to do real work.
+ * Otherwise, check that element expression appears in cataloged
+ * index definition.
+ */
+ if (elem->infercollid != InvalidOid ||
+ elem->inferopfamily != InvalidOid ||
+ list_member(idxExprs, elem->expr))
+ continue;
+
+ goto next;
+ }
+
+ /*
+ * Now that all inference elements were matched, ensure that the
+ * expression elements from inference clause are not missing any
+ * cataloged expressions. This does the right thing when unique
+ * indexes redundantly repeat the same attribute, or if attributes
+ * redundantly appear multiple times within an inference clause.
+ */
+ if (list_difference(idxExprs, inferElems) != NIL)
+ goto next;
+
+ /*
+ * Any user-supplied ON CONFLICT unique index inference WHERE clause
+ * need only be implied by the cataloged index definitions predicate.
+ */
+ predExprs = RelationGetIndexPredicate(idxRel);
+ whereExplicit = make_ands_implicit((Expr *) onconflict->arbiterWhere);
+
+ if (!predicate_implied_by(predExprs, whereExplicit))
+ goto next;
+
+ candidates = lappend_oid(candidates, idxForm->indexrelid);
+next:
+ index_close(idxRel, NoLock);
+ }
+
+ list_free(indexList);
+ heap_close(relation, NoLock);
+
+ if (candidates == NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("there is no unique or exclusion constraint matching the ON CONFLICT specification")));
+
+ return candidates;
+}
+
+/*
+ * infer_collation_opclass_match - ensure infer element opclass/collation match
+ *
+ * Given unique index inference element from inference specification, if
+ * collation was specified, or if opclass (represented here as opfamily +
+ * opcintype) was specified, verify that there is at least one matching
+ * indexed attribute (occasionally, there may be more). Skip this in the
+ * common case where inference specification does not include collation or
+ * opclass (instead matching everything, regardless of cataloged
+ * collation/opclass of indexed attribute).
+ *
+ * At least historically, Postgres has not offered collations or opclasses
+ * with alternative-to-default notions of equality, so these additional
+ * criteria should only be required infrequently.
+ *
+ * Don't give up immediately when an inference element matches some attribute
+ * cataloged as indexed but not matching additional opclass/collation
+ * criteria. This is done so that the implementation is as forgiving as
+ * possible of redundancy within cataloged index attributes (or, less
+ * usefully, within inference specification elements). If collations actually
+ * differ between apparently redundantly indexed attributes (redundant within
+ * or across indexes), then there really is no redundancy as such.
+ *
+ * Note that if an inference element specifies an opclass and a collation at
+ * once, both must match in at least one particular attribute within index
+ * catalog definition in order for that inference element to be considered
+ * inferred/satisfied.
+ */
+static bool
+infer_collation_opclass_match(InferenceElem *elem, Relation idxRel,
+ Bitmapset *inferAttrs, List *idxExprs)
+{
+ AttrNumber natt;
+
+ /*
+ * If inference specification element lacks collation/opclass, then no
+ * need to check for exact match.
+ */
+ if (elem->infercollid == InvalidOid && elem->inferopfamily == InvalidOid)
+ return true;
+
+ for (natt = 1; natt <= idxRel->rd_att->natts; natt++)
+ {
+ Oid opfamily = idxRel->rd_opfamily[natt - 1];
+ Oid opcinputtype = idxRel->rd_opcintype[natt - 1];
+ Oid collation = idxRel->rd_indcollation[natt - 1];
+
+ if (elem->inferopfamily != InvalidOid &&
+ (elem->inferopfamily != opfamily ||
+ elem->inferopcinputtype != opcinputtype))
+ {
+ /* Attribute needed to match opclass, but didn't */
+ continue;
+ }
+
+ if (elem->infercollid != InvalidOid &&
+ elem->infercollid != collation)
+ {
+ /* Attribute needed to match collation, but didn't */
+ continue;
+ }
+
+ if ((IsA(elem->expr, Var) &&
+ bms_is_member(((Var *) elem->expr)->varattno, inferAttrs)) ||
+ list_member(idxExprs, elem->expr))
+ {
+ /* Found one match - good enough */
+ return true;
+ }
+ }
+
+ return false;
+}
+
/*
* estimate_rel_size - estimate # pages and # tuples in a table or index
*
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 2d320d100b..3eb4feabfd 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -52,6 +52,8 @@ static Query *transformDeleteStmt(ParseState *pstate, DeleteStmt *stmt);
static Query *transformInsertStmt(ParseState *pstate, InsertStmt *stmt);
static List *transformInsertRow(ParseState *pstate, List *exprlist,
List *stmtcols, List *icolumns, List *attrnos);
+static OnConflictExpr *transformOnConflictClause(ParseState *pstate,
+ OnConflictClause *onConflictClause);
static int count_rowexpr_columns(ParseState *pstate, Node *expr);
static Query *transformSelectStmt(ParseState *pstate, SelectStmt *stmt);
static Query *transformValuesClause(ParseState *pstate, SelectStmt *stmt);
@@ -62,6 +64,8 @@ static void determineRecursiveColTypes(ParseState *pstate,
Node *larg, List *nrtargetlist);
static Query *transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt);
static List *transformReturningList(ParseState *pstate, List *returningList);
+static List *transformUpdateTargetList(ParseState *pstate,
+ List *targetList);
static Query *transformDeclareCursorStmt(ParseState *pstate,
DeclareCursorStmt *stmt);
static Query *transformExplainStmt(ParseState *pstate,
@@ -419,6 +423,8 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
ListCell *icols;
ListCell *attnos;
ListCell *lc;
+ bool isOnConflictUpdate;
+ AclMode targetPerms;
/* There can't be any outer WITH to worry about */
Assert(pstate->p_ctenamespace == NIL);
@@ -434,6 +440,9 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
qry->hasModifyingCTE = pstate->p_hasModifyingCTE;
}
+ isOnConflictUpdate = (stmt->onConflictClause &&
+ stmt->onConflictClause->action == ONCONFLICT_UPDATE);
+
/*
* We have three cases to deal with: DEFAULT VALUES (selectStmt == NULL),
* VALUES list, or general SELECT input. We special-case VALUES, both for
@@ -478,8 +487,11 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
* mentioned in the SELECT part. Note that the target table is not added
* to the joinlist or namespace.
*/
+ targetPerms = ACL_INSERT;
+ if (isOnConflictUpdate)
+ targetPerms |= ACL_UPDATE;
qry->resultRelation = setTargetTable(pstate, stmt->relation,
- false, false, ACL_INSERT);
+ false, false, targetPerms);
/* Validate stmt->cols list, or build default list if no list given */
icolumns = checkInsertTargets(pstate, stmt->cols, &attrnos);
@@ -740,6 +752,11 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt)
attnos = lnext(attnos);
}
+ /* Process ON CONFLICT, if any. */
+ if (stmt->onConflictClause)
+ qry->onConflict = transformOnConflictClause(pstate,
+ stmt->onConflictClause);
+
/*
* If we have a RETURNING clause, we need to add the target relation to
* the query namespace before processing it, so that Var references in
@@ -849,6 +866,85 @@ transformInsertRow(ParseState *pstate, List *exprlist,
return result;
}
+/*
+ * transformSelectStmt -
+ * transforms an OnConflictClause in an INSERT
+ */
+static OnConflictExpr *
+transformOnConflictClause(ParseState *pstate,
+ OnConflictClause *onConflictClause)
+{
+ List *arbiterElems;
+ Node *arbiterWhere;
+ Oid arbiterConstraint;
+ List *onConflictSet = NIL;
+ Node *onConflictWhere = NULL;
+ RangeTblEntry *exclRte = NULL;
+ int exclRelIndex = 0;
+ List *exclRelTlist = NIL;
+ OnConflictExpr *result;
+
+ /* Process the arbiter clause, ON CONFLICT ON (...) */
+ transformOnConflictArbiter(pstate, onConflictClause, &arbiterElems,
+ &arbiterWhere, &arbiterConstraint);
+
+ /* Process DO UPDATE */
+ if (onConflictClause->action == ONCONFLICT_UPDATE)
+ {
+ exclRte = addRangeTableEntryForRelation(pstate,
+ pstate->p_target_relation,
+ makeAlias("excluded", NIL),
+ false, false);
+ exclRelIndex = list_length(pstate->p_rtable);
+
+ /*
+ * Build a targetlist for the EXCLUDED pseudo relation. Out of
+ * simplicity we do that here, because expandRelAttrs() happens to
+ * nearly do the right thing; specifically it also works with views.
+ * It'd be more proper to instead scan some pseudo scan node, but it
+ * doesn't seem worth the amount of code required.
+ *
+ * The only caveat of this hack is that the permissions expandRelAttrs
+ * adds have to be reset. markVarForSelectPriv() will add the exact
+ * required permissions back.
+ */
+ exclRelTlist = expandRelAttrs(pstate, exclRte,
+ exclRelIndex, 0, -1);
+ exclRte->requiredPerms = 0;
+ exclRte->selectedCols = NULL;
+
+ /*
+ * Add EXCLUDED and the target RTE to the namespace, so that they can
+ * be used in the UPDATE statement.
+ */
+ addRTEtoQuery(pstate, exclRte, false, true, true);
+ addRTEtoQuery(pstate, pstate->p_target_rangetblentry,
+ false, true, true);
+
+ onConflictSet =
+ transformUpdateTargetList(pstate, onConflictClause->targetList);
+
+ onConflictWhere = transformWhereClause(pstate,
+ onConflictClause->whereClause,
+ EXPR_KIND_WHERE, "WHERE");
+ }
+
+ /* Finally, build ON CONFLICT DO [NOTHING | UPDATE] expression */
+ result = makeNode(OnConflictExpr);
+
+ result->action = onConflictClause->action;
+ result->arbiterElems = arbiterElems;
+ result->arbiterWhere = arbiterWhere;
+ result->constraint = arbiterConstraint;
+ result->onConflictSet = onConflictSet;
+ result->onConflictWhere = onConflictWhere;
+ result->exclRelIndex = exclRelIndex;
+ result->exclRelTlist = exclRelTlist;
+
+ return result;
+}
+
+
/*
* count_rowexpr_columns -
* get number of columns contained in a ROW() expression;
@@ -1899,10 +1995,7 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt)
{
Query *qry = makeNode(Query);
ParseNamespaceItem *nsitem;
- RangeTblEntry *target_rte;
Node *qual;
- ListCell *origTargetList;
- ListCell *tl;
qry->commandType = CMD_UPDATE;
pstate->p_is_update = true;
@@ -1937,23 +2030,41 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt)
nsitem->p_lateral_only = false;
nsitem->p_lateral_ok = true;
- qry->targetList = transformTargetList(pstate, stmt->targetList,
- EXPR_KIND_UPDATE_SOURCE);
-
qual = transformWhereClause(pstate, stmt->whereClause,
EXPR_KIND_WHERE, "WHERE");
qry->returningList = transformReturningList(pstate, stmt->returningList);
+ /*
+ * Now we are done with SELECT-like processing, and can get on with
+ * transforming the target list to match the UPDATE target columns.
+ */
+ qry->targetList = transformUpdateTargetList(pstate, stmt->targetList);
+
qry->rtable = pstate->p_rtable;
qry->jointree = makeFromExpr(pstate->p_joinlist, qual);
qry->hasSubLinks = pstate->p_hasSubLinks;
- /*
- * Now we are done with SELECT-like processing, and can get on with
- * transforming the target list to match the UPDATE target columns.
- */
+ assign_query_collations(pstate, qry);
+
+ return qry;
+}
+
+/*
+ * transformUpdateTargetList -
+ * handle SET clause in UPDATE/INSERT ... ON CONFLICT UPDATE
+ */
+static List *
+transformUpdateTargetList(ParseState *pstate, List *origTlist)
+{
+ List *tlist = NIL;
+ RangeTblEntry *target_rte;
+ ListCell *orig_tl;
+ ListCell *tl;
+
+ tlist = transformTargetList(pstate, origTlist,
+ EXPR_KIND_UPDATE_SOURCE);
/* Prepare to assign non-conflicting resnos to resjunk attributes */
if (pstate->p_next_resno <= pstate->p_target_relation->rd_rel->relnatts)
@@ -1961,9 +2072,9 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt)
/* Prepare non-junk columns for assignment to target table */
target_rte = pstate->p_target_rangetblentry;
- origTargetList = list_head(stmt->targetList);
+ orig_tl = list_head(origTlist);
- foreach(tl, qry->targetList)
+ foreach(tl, tlist)
{
TargetEntry *tle = (TargetEntry *) lfirst(tl);
ResTarget *origTarget;
@@ -1981,9 +2092,9 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt)
tle->resname = NULL;
continue;
}
- if (origTargetList == NULL)
+ if (orig_tl == NULL)
elog(ERROR, "UPDATE target count mismatch --- internal error");
- origTarget = (ResTarget *) lfirst(origTargetList);
+ origTarget = (ResTarget *) lfirst(orig_tl);
Assert(IsA(origTarget, ResTarget));
attrno = attnameAttNum(pstate->p_target_relation,
@@ -2005,14 +2116,12 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt)
target_rte->updatedCols = bms_add_member(target_rte->updatedCols,
attrno - FirstLowInvalidHeapAttributeNumber);
- origTargetList = lnext(origTargetList);
+ orig_tl = lnext(orig_tl);
}
- if (origTargetList != NULL)
+ if (orig_tl != NULL)
elog(ERROR, "UPDATE target count mismatch --- internal error");
- assign_query_collations(pstate, qry);
-
- return qry;
+ return tlist;
}
/*
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 0180530a30..7a4c07365c 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -217,6 +217,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
RangeVar *range;
IntoClause *into;
WithClause *with;
+ InferClause *infer;
+ OnConflictClause *onconflict;
A_Indices *aind;
ResTarget *target;
struct PrivTarget *privtarget;
@@ -318,7 +320,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
opt_class opt_inline_handler opt_validator validator_clause
opt_collate
-%type qualified_name OptConstrFromTable
+%type qualified_name insert_target OptConstrFromTable
%type all_Op MathOp
@@ -344,7 +346,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
OptTableElementList TableElementList OptInherit definition
OptTypedTableElementList TypedTableElementList
reloptions opt_reloptions
- OptWith opt_distinct opt_definition func_args func_args_list
+ OptWith distinct_clause opt_all_clause opt_definition func_args func_args_list
func_args_with_defaults func_args_with_defaults_list
aggr_args aggr_args_list
func_as createfunc_opt_list alterfunc_opt_list
@@ -389,7 +391,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%type for_locking_item
%type for_locking_clause opt_for_locking_clause for_locking_items
%type locked_rels_list
-%type opt_all
+%type all_or_distinct
%type join_outer join_qual
%type join_type
@@ -418,6 +420,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%type SeqOptElem
%type insert_rest
+%type opt_conf_expr
+%type opt_on_conflict
%type generic_set set_rest set_rest_more generic_reset reset_rest
SetResetClause FunctionSetResetClause
@@ -557,8 +561,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
CACHE CALLED CASCADE CASCADED CASE CAST CATALOG_P CHAIN CHAR_P
CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
CLUSTER COALESCE COLLATE COLLATION COLUMN COMMENT COMMENTS COMMIT
- COMMITTED CONCURRENTLY CONFIGURATION CONNECTION CONSTRAINT CONSTRAINTS
- CONTENT_P CONTINUE_P CONVERSION_P COPY COST CREATE
+ COMMITTED CONCURRENTLY CONFIGURATION CONFLICT CONNECTION CONSTRAINT
+ CONSTRAINTS CONTENT_P CONTINUE_P CONVERSION_P COPY COST CREATE
CROSS CSV CURRENT_P
CURRENT_CATALOG CURRENT_DATE CURRENT_ROLE CURRENT_SCHEMA
CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE
@@ -9436,15 +9440,35 @@ DeallocateStmt: DEALLOCATE name
*****************************************************************************/
InsertStmt:
- opt_with_clause INSERT INTO qualified_name insert_rest returning_clause
+ opt_with_clause INSERT INTO insert_target insert_rest
+ opt_on_conflict returning_clause
{
$5->relation = $4;
- $5->returningList = $6;
+ $5->onConflictClause = $6;
+ $5->returningList = $7;
$5->withClause = $1;
$$ = (Node *) $5;
}
;
+/*
+ * Can't easily make AS optional here, because VALUES in insert_rest would
+ * have a shift/reduce conflict with a values as a optional alias. We could
+ * easily allow unreserved_keywords as optional aliases, but that'd be a odd
+ * divergance from other places. So just require AS for now.
+ */
+insert_target:
+ qualified_name
+ {
+ $$ = $1;
+ }
+ | qualified_name AS ColId
+ {
+ $1->alias = makeAlias($3, NIL);
+ $$ = $1;
+ }
+ ;
+
insert_rest:
SelectStmt
{
@@ -9484,6 +9508,56 @@ insert_column_item:
}
;
+opt_on_conflict:
+ ON CONFLICT opt_conf_expr DO UPDATE SET set_clause_list where_clause
+ {
+ $$ = makeNode(OnConflictClause);
+ $$->action = ONCONFLICT_UPDATE;
+ $$->infer = $3;
+ $$->targetList = $7;
+ $$->whereClause = $8;
+ $$->location = @1;
+ }
+ |
+ ON CONFLICT opt_conf_expr DO NOTHING
+ {
+ $$ = makeNode(OnConflictClause);
+ $$->action = ONCONFLICT_NOTHING;
+ $$->infer = $3;
+ $$->targetList = NIL;
+ $$->whereClause = NULL;
+ $$->location = @1;
+ }
+ | /*EMPTY*/
+ {
+ $$ = NULL;
+ }
+ ;
+
+opt_conf_expr:
+ '(' index_params ')' where_clause
+ {
+ $$ = makeNode(InferClause);
+ $$->indexElems = $2;
+ $$->whereClause = $4;
+ $$->conname = NULL;
+ $$->location = @1;
+ }
+ |
+ ON CONSTRAINT name
+ {
+ $$ = makeNode(InferClause);
+ $$->indexElems = NIL;
+ $$->whereClause = NULL;
+ $$->conname = $3;
+ $$->location = @1;
+ }
+ | /*EMPTY*/
+ {
+ $$ = NULL;
+ }
+ ;
+
returning_clause:
RETURNING target_list { $$ = $2; }
| /* EMPTY */ { $$ = NIL; }
@@ -9870,7 +9944,21 @@ select_clause:
* However, this is not checked by the grammar; parse analysis must check it.
*/
simple_select:
- SELECT opt_distinct opt_target_list
+ SELECT opt_all_clause opt_target_list
+ into_clause from_clause where_clause
+ group_clause having_clause window_clause
+ {
+ SelectStmt *n = makeNode(SelectStmt);
+ n->targetList = $3;
+ n->intoClause = $4;
+ n->fromClause = $5;
+ n->whereClause = $6;
+ n->groupClause = $7;
+ n->havingClause = $8;
+ n->windowClause = $9;
+ $$ = (Node *)n;
+ }
+ | SELECT distinct_clause target_list
into_clause from_clause where_clause
group_clause having_clause window_clause
{
@@ -9905,15 +9993,15 @@ simple_select:
n->fromClause = list_make1($2);
$$ = (Node *)n;
}
- | select_clause UNION opt_all select_clause
+ | select_clause UNION all_or_distinct select_clause
{
$$ = makeSetOp(SETOP_UNION, $3, $1, $4);
}
- | select_clause INTERSECT opt_all select_clause
+ | select_clause INTERSECT all_or_distinct select_clause
{
$$ = makeSetOp(SETOP_INTERSECT, $3, $1, $4);
}
- | select_clause EXCEPT opt_all select_clause
+ | select_clause EXCEPT all_or_distinct select_clause
{
$$ = makeSetOp(SETOP_EXCEPT, $3, $1, $4);
}
@@ -10052,7 +10140,8 @@ opt_table: TABLE {}
| /*EMPTY*/ {}
;
-opt_all: ALL { $$ = TRUE; }
+all_or_distinct:
+ ALL { $$ = TRUE; }
| DISTINCT { $$ = FALSE; }
| /*EMPTY*/ { $$ = FALSE; }
;
@@ -10060,10 +10149,13 @@ opt_all: ALL { $$ = TRUE; }
/* We use (NIL) as a placeholder to indicate that all target expressions
* should be placed in the DISTINCT list during parsetree analysis.
*/
-opt_distinct:
+distinct_clause:
DISTINCT { $$ = list_make1(NIL); }
| DISTINCT ON '(' expr_list ')' { $$ = $4; }
- | ALL { $$ = NIL; }
+ ;
+
+opt_all_clause:
+ ALL { $$ = NIL;}
| /*EMPTY*/ { $$ = NIL; }
;
@@ -13367,6 +13459,7 @@ unreserved_keyword:
| COMMIT
| COMMITTED
| CONFIGURATION
+ | CONFLICT
| CONNECTION
| CONSTRAINTS
| CONTENT_P
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 8d90b5098a..73c505ed85 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -16,7 +16,9 @@
#include "postgres.h"
#include "access/heapam.h"
+#include "catalog/catalog.h"
#include "catalog/heap.h"
+#include "catalog/pg_constraint.h"
#include "catalog/pg_type.h"
#include "commands/defrem.h"
#include "nodes/makefuncs.h"
@@ -32,6 +34,7 @@
#include "parser/parse_oper.h"
#include "parser/parse_relation.h"
#include "parser/parse_target.h"
+#include "parser/parse_type.h"
#include "rewrite/rewriteManip.h"
#include "utils/guc.h"
#include "utils/lsyscache.h"
@@ -75,6 +78,8 @@ static TargetEntry *findTargetlistEntrySQL99(ParseState *pstate, Node *node,
List **tlist, ParseExprKind exprKind);
static int get_matching_location(int sortgroupref,
List *sortgrouprefs, List *exprs);
+static List *resolve_unique_index_expr(ParseState *pstate, InferClause * infer,
+ Relation heapRel);
static List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle,
List *grouplist, List *targetlist, int location,
bool resolveUnknown);
@@ -2166,6 +2171,204 @@ get_matching_location(int sortgroupref, List *sortgrouprefs, List *exprs)
return -1; /* keep compiler quiet */
}
+/*
+ * resolve_unique_index_expr
+ * Infer a unique index from a list of indexElems, for ON
+ * CONFLICT clause
+ *
+ * Perform parse analysis of expressions and columns appearing within ON
+ * CONFLICT clause. During planning, the returned list of expressions is used
+ * to infer which unique index to use.
+ */
+static List *
+resolve_unique_index_expr(ParseState *pstate, InferClause *infer,
+ Relation heapRel)
+{
+ List *result = NIL;
+ ListCell *l;
+
+ foreach(l, infer->indexElems)
+ {
+ IndexElem *ielem = (IndexElem *) lfirst(l);
+ InferenceElem *pInfer = makeNode(InferenceElem);
+ Node *parse;
+
+ /*
+ * Raw grammar re-uses CREATE INDEX infrastructure for unique index
+ * inference clause, and so will accept opclasses by name and so on.
+ *
+ * Make no attempt to match ASC or DESC ordering or NULLS FIRST/NULLS
+ * LAST ordering, since those are not significant for inference
+ * purposes (any unique index matching the inference specification in
+ * other regards is accepted indifferently). Actively reject this as
+ * wrong-headed.
+ */
+ if (ielem->ordering != SORTBY_DEFAULT)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("ASC/DESC is not allowed in ON CONFLICT clause"),
+ parser_errposition(pstate,
+ exprLocation((Node *) infer))));
+ if (ielem->nulls_ordering != SORTBY_NULLS_DEFAULT)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("NULLS FIRST/LAST is not allowed in ON CONFLICT clause"),
+ parser_errposition(pstate,
+ exprLocation((Node *) infer))));
+
+ if (!ielem->expr)
+ {
+ /* Simple index attribute */
+ ColumnRef *n;
+
+ /*
+ * Grammar won't have built raw expression for us in event of
+ * plain column reference. Create one directly, and perform
+ * expression transformation. Planner expects this, and performs
+ * its own normalization for the purposes of matching against
+ * pg_index.
+ */
+ n = makeNode(ColumnRef);
+ n->fields = list_make1(makeString(ielem->name));
+ /* Location is approximately that of inference specification */
+ n->location = infer->location;
+ parse = (Node *) n;
+ }
+ else
+ {
+ /* Do parse transformation of the raw expression */
+ parse = (Node *) ielem->expr;
+ }
+
+ /*
+ * transformExpr() should have already rejected subqueries,
+ * aggregates, and window functions, based on the EXPR_KIND_ for an
+ * index expression. Expressions returning sets won't have been
+ * rejected, but don't bother doing so here; there should be no
+ * available expression unique index to match any such expression
+ * against anyway.
+ */
+ pInfer->expr = transformExpr(pstate, parse, EXPR_KIND_INDEX_EXPRESSION);
+
+ /* Perform lookup of collation and operator class as required */
+ if (!ielem->collation)
+ pInfer->infercollid = InvalidOid;
+ else
+ pInfer->infercollid = LookupCollation(pstate, ielem->collation,
+ exprLocation(pInfer->expr));
+
+ if (!ielem->opclass)
+ {
+ pInfer->inferopfamily = InvalidOid;
+ pInfer->inferopcinputtype = InvalidOid;
+ }
+ else
+ {
+ Oid opclass = get_opclass_oid(BTREE_AM_OID, ielem->opclass,
+ false);
+
+ pInfer->inferopfamily = get_opclass_family(opclass);
+ pInfer->inferopcinputtype = get_opclass_input_type(opclass);
+ }
+
+ result = lappend(result, pInfer);
+ }
+
+ return result;
+}
+
+/*
+ * transformOnConflictArbiter -
+ * transform arbiter expressions in an ON CONFLICT clause.
+ *
+ * Transformed expressions used to infer one unique index relation to serve as
+ * an ON CONFLICT arbiter. Partial unique indexes may be inferred using WHERE
+ * clause from inference specification clause.
+ */
+void
+transformOnConflictArbiter(ParseState *pstate,
+ OnConflictClause *onConflictClause,
+ List **arbiterExpr, Node **arbiterWhere,
+ Oid *constraint)
+{
+ InferClause *infer = onConflictClause->infer;
+
+ *arbiterExpr = NIL;
+ *arbiterWhere = NULL;
+ *constraint = InvalidOid;
+
+ if (onConflictClause->action == ONCONFLICT_UPDATE && !infer)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("ON CONFLICT DO UPDATE requires inference specification or constraint name"),
+ errhint("For example, ON CONFLICT ON CONFLICT ()."),
+ parser_errposition(pstate,
+ exprLocation((Node *) onConflictClause))));
+
+ /*
+ * To simplify certain aspects of its design, speculative insertion into
+ * system catalogs is disallowed
+ */
+ if (IsCatalogRelation(pstate->p_target_relation))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("ON CONFLICT not supported with system catalog tables"),
+ parser_errposition(pstate,
+ exprLocation((Node *) onConflictClause))));
+
+ /* Same applies to table used by logical decoding as catalog table */
+ if (RelationIsUsedAsCatalogTable(pstate->p_target_relation))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("ON CONFLICT not supported on table \"%s\" used as a catalog table",
+ RelationGetRelationName(pstate->p_target_relation)),
+ parser_errposition(pstate,
+ exprLocation((Node *) onConflictClause))));
+
+ /* ON CONFLICT DO NOTHING does not require an inference clause */
+ if (infer)
+ {
+ List *save_namespace;
+
+ /*
+ * While we process the arbiter expressions, accept only
+ * non-qualified references to the target table. Hide any other
+ * relations.
+ */
+ save_namespace = pstate->p_namespace;
+ pstate->p_namespace = NIL;
+ addRTEtoQuery(pstate, pstate->p_target_rangetblentry,
+ false, false, true);
+
+ if (infer->indexElems)
+ *arbiterExpr = resolve_unique_index_expr(pstate, infer,
+ pstate->p_target_relation);
+
+ /*
+ * Handling inference WHERE clause (for partial unique index
+ * inference)
+ */
+ if (infer->whereClause)
+ *arbiterWhere = transformExpr(pstate, infer->whereClause,
+ EXPR_KIND_INDEX_PREDICATE);
+
+ pstate->p_namespace = save_namespace;
+
+ if (infer->conname)
+ *constraint = get_relation_constraint_oid(RelationGetRelid(pstate->p_target_relation),
+ infer->conname, false);
+ }
+
+ /*
+ * It's convenient to form a list of expressions based on the
+ * representation used by CREATE INDEX, since the same restrictions are
+ * appropriate (e.g. on subqueries). However, from here on, a dedicated
+ * primnode representation is used for inference elements, and so
+ * assign_query_collations() can be trusted to do the right thing with the
+ * post parse analysis query tree inference clause representation.
+ */
+}
+
/*
* addTargetToSortList
* If the given targetlist entry isn't already in the SortGroupClause
diff --git a/src/backend/parser/parse_collate.c b/src/backend/parser/parse_collate.c
index 7c6a11c757..4c85b708d3 100644
--- a/src/backend/parser/parse_collate.c
+++ b/src/backend/parser/parse_collate.c
@@ -479,9 +479,11 @@ assign_collations_walker(Node *node, assign_collations_context *context)
parser_errposition(context->pstate,
loccontext.location2)));
break;
+ case T_InferenceElem:
case T_RangeTblRef:
case T_JoinExpr:
case T_FromExpr:
+ case T_OnConflictExpr:
case T_SortGroupClause:
(void) expression_tree_walker(node,
assign_collations_walker,
diff --git a/src/backend/parser/parse_target.c b/src/backend/parser/parse_target.c
index 2d85cf08e7..59973ba9c3 100644
--- a/src/backend/parser/parse_target.c
+++ b/src/backend/parser/parse_target.c
@@ -537,11 +537,12 @@ transformAssignedExpr(ParseState *pstate,
/*
* updateTargetListEntry()
- * This is used in UPDATE statements only. It prepares an UPDATE
- * TargetEntry for assignment to a column of the target table.
- * This includes coercing the given value to the target column's type
- * (if necessary), and dealing with any subfield names or subscripts
- * attached to the target column itself.
+ * This is used in UPDATE statements (and ON CONFLICT DO UPDATE)
+ * only. It prepares an UPDATE TargetEntry for assignment to a
+ * column of the target table. This includes coercing the given
+ * value to the target column's type (if necessary), and dealing with
+ * any subfield names or subscripts attached to the target column
+ * itself.
*
* pstate parse state
* tle target list entry to be modified
diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c
index 88424964ef..ea38818269 100644
--- a/src/backend/replication/logical/decode.c
+++ b/src/backend/replication/logical/decode.c
@@ -64,6 +64,8 @@ static void DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
static void DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
static void DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
static void DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
+static void DecodeSpecConfirm(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
+
static void DecodeCommit(LogicalDecodingContext *ctx, XLogRecordBuffer *buf,
xl_xact_parsed_commit *parsed, TransactionId xid);
static void DecodeAbort(LogicalDecodingContext *ctx, XLogRecordBuffer *buf,
@@ -414,6 +416,11 @@ DecodeHeapOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr);
break;
+ case XLOG_HEAP_CONFIRM:
+ if (SnapBuildProcessChange(builder, xid, buf->origptr))
+ DecodeSpecConfirm(ctx, buf);
+ break;
+
case XLOG_HEAP_LOCK:
/* we don't care about row level locks for now */
break;
@@ -564,11 +571,15 @@ DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
return;
change = ReorderBufferGetChange(ctx->reorder);
- change->action = REORDER_BUFFER_CHANGE_INSERT;
+ if (!(xlrec->flags & XLH_INSERT_IS_SPECULATIVE))
+ change->action = REORDER_BUFFER_CHANGE_INSERT;
+ else
+ change->action = REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT;
change->origin_id = XLogRecGetOrigin(r);
+
memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
- if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
+ if (xlrec->flags & XLH_INSERT_CONTAINS_NEW_TUPLE)
{
Size tuplelen;
char *tupledata = XLogRecGetBlockData(r, 0, &tuplelen);
@@ -615,7 +626,7 @@ DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
change->origin_id = XLogRecGetOrigin(r);
memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
- if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
+ if (xlrec->flags & XLH_UPDATE_CONTAINS_NEW_TUPLE)
{
data = XLogRecGetBlockData(r, 0, &datalen);
@@ -624,7 +635,7 @@ DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
DecodeXLogTuple(data, datalen, change->data.tp.newtuple);
}
- if (xlrec->flags & XLOG_HEAP_CONTAINS_OLD)
+ if (xlrec->flags & XLH_UPDATE_CONTAINS_OLD)
{
/* caution, remaining data in record is not aligned */
data = XLogRecGetData(r) + SizeOfHeapUpdate;
@@ -660,6 +671,13 @@ DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
if (target_node.dbNode != ctx->slot->data.database)
return;
+ /*
+ * Super deletions are irrelevant for logical decoding, it's driven by the
+ * confirmation records.
+ */
+ if (xlrec->flags & XLH_DELETE_IS_SUPER)
+ return;
+
/* output plugin doesn't look for this origin, no need to queue */
if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
return;
@@ -671,7 +689,7 @@ DecodeDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
/* old primary key stored */
- if (xlrec->flags & XLOG_HEAP_CONTAINS_OLD)
+ if (xlrec->flags & XLH_DELETE_CONTAINS_OLD)
{
Assert(XLogRecGetDataLen(r) > (SizeOfHeapDelete + SizeOfHeapHeader));
@@ -737,7 +755,7 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
* We decode the tuple in pretty much the same way as DecodeXLogTuple,
* but since the layout is slightly different, we can't use it here.
*/
- if (xlrec->flags & XLOG_HEAP_CONTAINS_NEW_TUPLE)
+ if (xlrec->flags & XLH_INSERT_CONTAINS_NEW_TUPLE)
{
change->data.tp.newtuple = ReorderBufferGetTupleBuf(ctx->reorder);
@@ -775,7 +793,7 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
* xl_multi_insert_tuple record emitted by one heap_multi_insert()
* call.
*/
- if (xlrec->flags & XLOG_HEAP_LAST_MULTI_INSERT &&
+ if (xlrec->flags & XLH_INSERT_LAST_IN_MULTI &&
(i + 1) == xlrec->ntuples)
change->data.tp.clear_toast_afterwards = true;
else
@@ -787,6 +805,40 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
Assert(data == tupledata + tuplelen);
}
+/*
+ * Parse XLOG_HEAP_CONFIRM from wal into a confirmation change.
+ *
+ * This is pretty trivial, all the state essentially already setup by the
+ * speculative insertion.
+ */
+static void
+DecodeSpecConfirm(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
+{
+ XLogReaderState *r = buf->record;
+ ReorderBufferChange *change;
+ RelFileNode target_node;
+
+ /* only interested in our database */
+ XLogRecGetBlockTag(r, 0, &target_node, NULL, NULL);
+ if (target_node.dbNode != ctx->slot->data.database)
+ return;
+
+ /* output plugin doesn't look for this origin, no need to queue */
+ if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
+ return;
+
+ change = ReorderBufferGetChange(ctx->reorder);
+ change->action = REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM;
+ change->origin_id = XLogRecGetOrigin(r);
+
+ memcpy(&change->data.tp.relnode, &target_node, sizeof(RelFileNode));
+
+ change->data.tp.clear_toast_afterwards = true;
+
+ ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr, change);
+}
+
+
/*
* Read a HeapTuple as WAL logged by heap_insert, heap_update and heap_delete
* (but not by heap_multi_insert) into a tuplebuf.
diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index c9c1d1036e..57854b0aa5 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -401,6 +401,7 @@ ReorderBufferReturnChange(ReorderBuffer *rb, ReorderBufferChange *change)
case REORDER_BUFFER_CHANGE_INSERT:
case REORDER_BUFFER_CHANGE_UPDATE:
case REORDER_BUFFER_CHANGE_DELETE:
+ case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT:
if (change->data.tp.newtuple)
{
ReorderBufferReturnTupleBuf(rb, change->data.tp.newtuple);
@@ -420,8 +421,9 @@ ReorderBufferReturnChange(ReorderBuffer *rb, ReorderBufferChange *change)
change->data.snapshot = NULL;
}
break;
+ /* no data in addition to the struct itself */
+ case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM:
case REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID:
- break;
case REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID:
break;
}
@@ -1317,6 +1319,7 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
PG_TRY();
{
ReorderBufferChange *change;
+ ReorderBufferChange *specinsert = NULL;
if (using_subtxn)
BeginInternalSubTransaction("replay");
@@ -1333,6 +1336,17 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
switch (change->action)
{
+ case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM:
+ /*
+ * Confirmation for speculative insertion arrived. Simply
+ * use as a normal record. It'll be cleaned up at the end
+ * of INSERT processing.
+ */
+ Assert(specinsert->data.tp.oldtuple == NULL);
+ change = specinsert;
+ change->action = REORDER_BUFFER_CHANGE_INSERT;
+
+ /* intentionally fall through */
case REORDER_BUFFER_CHANGE_INSERT:
case REORDER_BUFFER_CHANGE_UPDATE:
case REORDER_BUFFER_CHANGE_DELETE:
@@ -1348,7 +1362,7 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
if (reloid == InvalidOid &&
change->data.tp.newtuple == NULL &&
change->data.tp.oldtuple == NULL)
- continue;
+ goto change_done;
else if (reloid == InvalidOid)
elog(ERROR, "could not map filenode \"%s\" to relation OID",
relpathperm(change->data.tp.relnode,
@@ -1362,50 +1376,92 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
relpathperm(change->data.tp.relnode,
MAIN_FORKNUM));
- if (RelationIsLogicallyLogged(relation))
+ if (!RelationIsLogicallyLogged(relation))
+ goto change_done;
+
+ /*
+ * For now ignore sequence changes entirely. Most of
+ * the time they don't log changes using records we
+ * understand, so it doesn't make sense to handle the
+ * few cases we do.
+ */
+ if (relation->rd_rel->relkind == RELKIND_SEQUENCE)
+ goto change_done;
+
+ /* user-triggered change */
+ if (!IsToastRelation(relation))
{
+ ReorderBufferToastReplace(rb, txn, relation, change);
+ rb->apply_change(rb, txn, relation, change);
+
/*
- * For now ignore sequence changes entirely. Most of
- * the time they don't log changes using records we
- * understand, so it doesn't make sense to handle the
- * few cases we do.
+ * Only clear reassembled toast chunks if we're
+ * sure they're not required anymore. The creator
+ * of the tuple tells us.
*/
- if (relation->rd_rel->relkind == RELKIND_SEQUENCE)
- {
- }
- /* user-triggered change */
- else if (!IsToastRelation(relation))
- {
- ReorderBufferToastReplace(rb, txn, relation, change);
- rb->apply_change(rb, txn, relation, change);
-
- /*
- * Only clear reassembled toast chunks if we're
- * sure they're not required anymore. The creator
- * of the tuple tells us.
- */
- if (change->data.tp.clear_toast_afterwards)
- ReorderBufferToastReset(rb, txn);
- }
- /* we're not interested in toast deletions */
- else if (change->action == REORDER_BUFFER_CHANGE_INSERT)
- {
- /*
- * Need to reassemble the full toasted Datum in
- * memory, to ensure the chunks don't get reused
- * till we're done remove it from the list of this
- * transaction's changes. Otherwise it will get
- * freed/reused while restoring spooled data from
- * disk.
- */
- dlist_delete(&change->node);
- ReorderBufferToastAppendChunk(rb, txn, relation,
- change);
- }
+ if (change->data.tp.clear_toast_afterwards)
+ ReorderBufferToastReset(rb, txn);
+ }
+ /* we're not interested in toast deletions */
+ else if (change->action == REORDER_BUFFER_CHANGE_INSERT)
+ {
+ /*
+ * Need to reassemble the full toasted Datum in
+ * memory, to ensure the chunks don't get reused till
+ * we're done remove it from the list of this
+ * transaction's changes. Otherwise it will get
+ * freed/reused while restoring spooled data from
+ * disk.
+ */
+ dlist_delete(&change->node);
+ ReorderBufferToastAppendChunk(rb, txn, relation,
+ change);
+ }
+
+ change_done:
+ /*
+ * Either speculative insertion was confirmed, or it was
+ * unsuccessful and the record isn't needed anymore.
+ */
+ if (specinsert != NULL)
+ {
+ ReorderBufferReturnChange(rb, specinsert);
+ specinsert = NULL;
+ }
+ if (relation != NULL)
+ {
+ RelationClose(relation);
+ relation = NULL;
}
- RelationClose(relation);
break;
+
+ case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT:
+ /*
+ * Speculative insertions are dealt with by delaying the
+ * processing of the insert until the confirmation record
+ * arrives. For that we simply unlink the record from the
+ * chain, so it does not get freed/reused while restoring
+ * spooled data from disk.
+ *
+ * This is safe in the face of concurrent catalog changes
+ * because the relevant relation can't be changed between
+ * speculative insertion and confirmation due to
+ * CheckTableNotInUse() and locking.
+ */
+
+ /* clear out a pending (and thus failed) speculation */
+ if (specinsert != NULL)
+ {
+ ReorderBufferReturnChange(rb, specinsert);
+ specinsert = NULL;
+ }
+
+ /* and memorize the pending insertion */
+ dlist_delete(&change->node);
+ specinsert = change;
+ break;
+
case REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT:
/* get rid of the old */
TeardownHistoricSnapshot(false);
@@ -1474,6 +1530,17 @@ ReorderBufferCommit(ReorderBuffer *rb, TransactionId xid,
}
}
+ /*
+ * There's a a speculative insertion remaining, just clean in up, it
+ * can't have been successful, otherwise we'd gotten a confirmation
+ * record.
+ */
+ if (specinsert)
+ {
+ ReorderBufferReturnChange(rb, specinsert);
+ specinsert = NULL;
+ }
+
/* clean up the iterator */
ReorderBufferIterTXNFinish(rb, iterstate);
iterstate = NULL;
@@ -2001,11 +2068,11 @@ ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
switch (change->action)
{
+ /* fall through these, they're all similar enough */
case REORDER_BUFFER_CHANGE_INSERT:
- /* fall through */
case REORDER_BUFFER_CHANGE_UPDATE:
- /* fall through */
case REORDER_BUFFER_CHANGE_DELETE:
+ case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT:
{
char *data;
ReorderBufferTupleBuf *oldtup,
@@ -2083,9 +2150,8 @@ ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
}
break;
}
+ case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM:
case REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID:
- /* ReorderBufferChange contains everything important */
- break;
case REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID:
/* ReorderBufferChange contains everything important */
break;
@@ -2256,11 +2322,11 @@ ReorderBufferRestoreChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
/* restore individual stuff */
switch (change->action)
{
+ /* fall through these, they're all similar enough */
case REORDER_BUFFER_CHANGE_INSERT:
- /* fall through */
case REORDER_BUFFER_CHANGE_UPDATE:
- /* fall through */
case REORDER_BUFFER_CHANGE_DELETE:
+ case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT:
if (change->data.tp.newtuple)
{
Size len = offsetof(ReorderBufferTupleBuf, t_data) +
@@ -2309,6 +2375,7 @@ ReorderBufferRestoreChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
break;
}
/* the base struct contains all the data, easy peasy */
+ case REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM:
case REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID:
case REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID:
break;
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index 0fc47cb786..39302a410b 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -52,7 +52,10 @@ static Query *rewriteRuleAction(Query *parsetree,
CmdType event,
bool *returning_flag);
static List *adjustJoinTreeList(Query *parsetree, bool removert, int rt_index);
-static void rewriteTargetListIU(Query *parsetree, Relation target_relation,
+static List *rewriteTargetListIU(List *targetList,
+ CmdType commandType,
+ Relation target_relation,
+ int result_rti,
List **attrno_list);
static TargetEntry *process_matched_tle(TargetEntry *src_tle,
TargetEntry *prior_tle,
@@ -66,7 +69,7 @@ static void markQueryForLocking(Query *qry, Node *jtnode,
LockClauseStrength strength, LockWaitPolicy waitPolicy,
bool pushedDown);
static List *matchLocks(CmdType event, RuleLock *rulelocks,
- int varno, Query *parsetree);
+ int varno, Query *parsetree, bool *hasUpdate);
static Query *fireRIRrules(Query *parsetree, List *activeRIRs,
bool forUpdatePushedDown);
static bool view_has_instead_trigger(Relation view, CmdType event);
@@ -679,11 +682,13 @@ adjustJoinTreeList(Query *parsetree, bool removert, int rt_index)
* order of the original tlist's non-junk entries. This is needed for
* processing VALUES RTEs.
*/
-static void
-rewriteTargetListIU(Query *parsetree, Relation target_relation,
+static List*
+rewriteTargetListIU(List *targetList,
+ CmdType commandType,
+ Relation target_relation,
+ int result_rti,
List **attrno_list)
{
- CmdType commandType = parsetree->commandType;
TargetEntry **new_tles;
List *new_tlist = NIL;
List *junk_tlist = NIL;
@@ -709,7 +714,7 @@ rewriteTargetListIU(Query *parsetree, Relation target_relation,
new_tles = (TargetEntry **) palloc0(numattrs * sizeof(TargetEntry *));
next_junk_attrno = numattrs + 1;
- foreach(temp, parsetree->targetList)
+ foreach(temp, targetList)
{
TargetEntry *old_tle = (TargetEntry *) lfirst(temp);
@@ -827,7 +832,7 @@ rewriteTargetListIU(Query *parsetree, Relation target_relation,
{
Node *new_expr;
- new_expr = (Node *) makeVar(parsetree->resultRelation,
+ new_expr = (Node *) makeVar(result_rti,
attrno,
att_tup->atttypid,
att_tup->atttypmod,
@@ -846,7 +851,7 @@ rewriteTargetListIU(Query *parsetree, Relation target_relation,
pfree(new_tles);
- parsetree->targetList = list_concat(new_tlist, junk_tlist);
+ return list_concat(new_tlist, junk_tlist);
}
@@ -1288,7 +1293,8 @@ static List *
matchLocks(CmdType event,
RuleLock *rulelocks,
int varno,
- Query *parsetree)
+ Query *parsetree,
+ bool *hasUpdate)
{
List *matching_locks = NIL;
int nlocks;
@@ -1309,6 +1315,9 @@ matchLocks(CmdType event,
{
RewriteRule *oneLock = rulelocks->rules[i];
+ if (oneLock->event == CMD_UPDATE)
+ *hasUpdate = true;
+
/*
* Suppress ON INSERT/UPDATE/DELETE rules that are disabled or
* configured to not fire during the current sessions replication
@@ -1766,8 +1775,8 @@ fireRIRrules(Query *parsetree, List *activeRIRs, bool forUpdatePushedDown)
/*
* Fetch any new security quals that must be applied to this RTE.
*/
- get_row_security_policies(parsetree, rte, rt_index,
- &securityQuals, &withCheckOptions,
+ get_row_security_policies(parsetree, parsetree->commandType, rte,
+ rt_index, &securityQuals, &withCheckOptions,
&hasRowSecurity, &hasSubLinks);
if (securityQuals != NIL || withCheckOptions != NIL)
@@ -2642,6 +2651,18 @@ rewriteTargetView(Query *parsetree, Relation view)
tle->resno - FirstLowInvalidHeapAttributeNumber);
}
+ if (parsetree->onConflict)
+ {
+ foreach(lc, parsetree->onConflict->onConflictSet)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(lc);
+
+ if (!tle->resjunk)
+ modified_cols = bms_add_member(modified_cols,
+ tle->resno - FirstLowInvalidHeapAttributeNumber);
+ }
+ }
+
auto_update_detail = view_cols_are_auto_updatable(viewquery,
modified_cols,
NULL,
@@ -2999,6 +3020,7 @@ RewriteQuery(Query *parsetree, List *rewrite_events)
CmdType event = parsetree->commandType;
bool instead = false;
bool returning = false;
+ bool updatableview = false;
Query *qual_product = NULL;
List *rewritten = NIL;
ListCell *lc1;
@@ -3081,6 +3103,7 @@ RewriteQuery(Query *parsetree, List *rewrite_events)
Relation rt_entry_relation;
List *locks;
List *product_queries;
+ bool hasUpdate = false;
result_relation = parsetree->resultRelation;
Assert(result_relation != 0);
@@ -3123,19 +3146,41 @@ RewriteQuery(Query *parsetree, List *rewrite_events)
List *attrnos;
/* Process the main targetlist ... */
- rewriteTargetListIU(parsetree, rt_entry_relation, &attrnos);
+ parsetree->targetList = rewriteTargetListIU(parsetree->targetList,
+ parsetree->commandType,
+ rt_entry_relation,
+ parsetree->resultRelation,
+ &attrnos);
/* ... and the VALUES expression lists */
rewriteValuesRTE(values_rte, rt_entry_relation, attrnos);
}
else
{
/* Process just the main targetlist */
- rewriteTargetListIU(parsetree, rt_entry_relation, NULL);
+ parsetree->targetList =
+ rewriteTargetListIU(parsetree->targetList,
+ parsetree->commandType,
+ rt_entry_relation,
+ parsetree->resultRelation, NULL);
+ }
+
+ if (parsetree->onConflict &&
+ parsetree->onConflict->action == ONCONFLICT_UPDATE)
+ {
+ parsetree->onConflict->onConflictSet =
+ rewriteTargetListIU(parsetree->onConflict->onConflictSet,
+ CMD_UPDATE,
+ rt_entry_relation,
+ parsetree->resultRelation,
+ NULL);
}
}
else if (event == CMD_UPDATE)
{
- rewriteTargetListIU(parsetree, rt_entry_relation, NULL);
+ parsetree->targetList =
+ rewriteTargetListIU(parsetree->targetList,
+ parsetree->commandType, rt_entry_relation,
+ parsetree->resultRelation, NULL);
rewriteTargetListUD(parsetree, rt_entry, rt_entry_relation);
}
else if (event == CMD_DELETE)
@@ -3149,7 +3194,7 @@ RewriteQuery(Query *parsetree, List *rewrite_events)
* Collect and apply the appropriate rules.
*/
locks = matchLocks(event, rt_entry_relation->rd_rules,
- result_relation, parsetree);
+ result_relation, parsetree, &hasUpdate);
product_queries = fireRules(parsetree,
result_relation,
@@ -3198,6 +3243,7 @@ RewriteQuery(Query *parsetree, List *rewrite_events)
*/
instead = true;
returning = true;
+ updatableview = true;
}
/*
@@ -3278,6 +3324,17 @@ RewriteQuery(Query *parsetree, List *rewrite_events)
}
}
+ /*
+ * Updatable views are supported by ON CONFLICT, so don't prevent that
+ * case from proceeding
+ */
+ if (parsetree->onConflict &&
+ (product_queries != NIL || hasUpdate) &&
+ !updatableview)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("INSERT with ON CONFLICT clause cannot be used with table that has INSERT or UPDATE rules")));
+
heap_close(rt_entry_relation, NoLock);
}
diff --git a/src/backend/rewrite/rowsecurity.c b/src/backend/rewrite/rowsecurity.c
index b0b308118f..2c095ce88a 100644
--- a/src/backend/rewrite/rowsecurity.c
+++ b/src/backend/rewrite/rowsecurity.c
@@ -89,9 +89,10 @@ row_security_policy_hook_type row_security_policy_hook_restrictive = NULL;
* set to true if any of the quals returned contain sublinks.
*/
void
-get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index,
- List **securityQuals, List **withCheckOptions,
- bool *hasRowSecurity, bool *hasSubLinks)
+get_row_security_policies(Query* root, CmdType commandType, RangeTblEntry* rte,
+ int rt_index, List **securityQuals,
+ List **withCheckOptions, bool *hasRowSecurity,
+ bool *hasSubLinks)
{
Expr *rowsec_expr = NULL;
Expr *rowsec_with_check_expr = NULL;
@@ -159,7 +160,7 @@ get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index,
/* Grab the built-in policies which should be applied to this relation. */
rel = heap_open(rte->relid, NoLock);
- rowsec_policies = pull_row_security_policies(root->commandType, rel,
+ rowsec_policies = pull_row_security_policies(commandType, rel,
user_id);
/*
@@ -201,7 +202,7 @@ get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index,
*/
if (row_security_policy_hook_restrictive)
{
- hook_policies_restrictive = (*row_security_policy_hook_restrictive)(root->commandType, rel);
+ hook_policies_restrictive = (*row_security_policy_hook_restrictive)(commandType, rel);
/* Build the expression from any policies returned. */
if (hook_policies_restrictive != NIL)
@@ -214,7 +215,7 @@ get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index,
if (row_security_policy_hook_permissive)
{
- hook_policies_permissive = (*row_security_policy_hook_permissive)(root->commandType, rel);
+ hook_policies_permissive = (*row_security_policy_hook_permissive)(commandType, rel);
/* Build the expression from any policies returned. */
if (hook_policies_permissive != NIL)
@@ -242,7 +243,7 @@ get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index,
* WITH CHECK policy (this will be a copy of the USING policy, if no
* explicit WITH CHECK policy exists).
*/
- if (root->commandType == CMD_INSERT || root->commandType == CMD_UPDATE)
+ if (commandType == CMD_INSERT || commandType == CMD_UPDATE)
{
/*
* WITH CHECK OPTIONS wants a WCO node which wraps each Expr, so
@@ -259,7 +260,7 @@ get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index,
WithCheckOption *wco;
wco = (WithCheckOption *) makeNode(WithCheckOption);
- wco->kind = root->commandType == CMD_INSERT ? WCO_RLS_INSERT_CHECK :
+ wco->kind = commandType == CMD_INSERT ? WCO_RLS_INSERT_CHECK :
WCO_RLS_UPDATE_CHECK;
wco->relname = pstrdup(RelationGetRelationName(rel));
wco->qual = (Node *) hook_with_check_expr_restrictive;
@@ -276,7 +277,7 @@ get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index,
WithCheckOption *wco;
wco = (WithCheckOption *) makeNode(WithCheckOption);
- wco->kind = root->commandType == CMD_INSERT ? WCO_RLS_INSERT_CHECK :
+ wco->kind = commandType == CMD_INSERT ? WCO_RLS_INSERT_CHECK :
WCO_RLS_UPDATE_CHECK;
wco->relname = pstrdup(RelationGetRelationName(rel));
wco->qual = (Node *) rowsec_with_check_expr;
@@ -289,7 +290,7 @@ get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index,
WithCheckOption *wco;
wco = (WithCheckOption *) makeNode(WithCheckOption);
- wco->kind = root->commandType == CMD_INSERT ? WCO_RLS_INSERT_CHECK :
+ wco->kind = commandType == CMD_INSERT ? WCO_RLS_INSERT_CHECK :
WCO_RLS_UPDATE_CHECK;
wco->relname = pstrdup(RelationGetRelationName(rel));
wco->qual = (Node *) hook_with_check_expr_permissive;
@@ -312,19 +313,72 @@ get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index,
combined_qual_eval = makeBoolExpr(OR_EXPR, combined_quals, -1);
wco = (WithCheckOption *) makeNode(WithCheckOption);
- wco->kind = root->commandType == CMD_INSERT ? WCO_RLS_INSERT_CHECK :
+ wco->kind = commandType == CMD_INSERT ? WCO_RLS_INSERT_CHECK :
WCO_RLS_UPDATE_CHECK;
wco->relname = pstrdup(RelationGetRelationName(rel));
wco->qual = (Node *) combined_qual_eval;
wco->cascaded = false;
*withCheckOptions = lappend(*withCheckOptions, wco);
}
+
+ /*
+ * ON CONFLICT DO UPDATE has an RTE that is subject to both INSERT and
+ * UPDATE RLS enforcement. Those are enforced (as a special, distinct
+ * kind of WCO) on the target tuple.
+ *
+ * Make a second, recursive pass over the RTE for this, gathering
+ * UPDATE-applicable RLS checks/WCOs, and gathering and converting
+ * UPDATE-applicable security quals into WCO_RLS_CONFLICT_CHECK RLS
+ * checks/WCOs. Finally, these distinct kinds of RLS checks/WCOs are
+ * concatenated with our own INSERT-applicable list.
+ */
+ if (root->onConflict && root->onConflict->action == ONCONFLICT_UPDATE &&
+ commandType == CMD_INSERT)
+ {
+ List *conflictSecurityQuals = NIL;
+ List *conflictWCOs = NIL;
+ ListCell *item;
+ bool conflictHasRowSecurity = false;
+ bool conflictHasSublinks = false;
+
+ /* Assume that RTE is target resultRelation */
+ get_row_security_policies(root, CMD_UPDATE, rte, rt_index,
+ &conflictSecurityQuals, &conflictWCOs,
+ &conflictHasRowSecurity,
+ &conflictHasSublinks);
+
+ if (conflictHasRowSecurity)
+ *hasRowSecurity = true;
+ if (conflictHasSublinks)
+ *hasSubLinks = true;
+
+ /*
+ * Append WITH CHECK OPTIONs/RLS checks, which should not conflict
+ * between this INSERT and the auxiliary UPDATE
+ */
+ *withCheckOptions = list_concat(*withCheckOptions,
+ conflictWCOs);
+
+ foreach(item, conflictSecurityQuals)
+ {
+ Expr *conflict_rowsec_expr = (Expr *) lfirst(item);
+ WithCheckOption *wco;
+
+ wco = (WithCheckOption *) makeNode(WithCheckOption);
+
+ wco->kind = WCO_RLS_CONFLICT_CHECK;
+ wco->relname = pstrdup(RelationGetRelationName(rel));
+ wco->qual = (Node *) copyObject(conflict_rowsec_expr);
+ wco->cascaded = false;
+ *withCheckOptions = lappend(*withCheckOptions, wco);
+ }
+ }
}
/* For SELECT, UPDATE, and DELETE, set the security quals */
- if (root->commandType == CMD_SELECT
- || root->commandType == CMD_UPDATE
- || root->commandType == CMD_DELETE)
+ if (commandType == CMD_SELECT
+ || commandType == CMD_UPDATE
+ || commandType == CMD_DELETE)
{
/* restrictive policies can simply be added to the list first */
if (hook_expr_restrictive)
diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c
index d13a167334..c052949749 100644
--- a/src/backend/storage/lmgr/lmgr.c
+++ b/src/backend/storage/lmgr/lmgr.c
@@ -25,6 +25,24 @@
#include "utils/inval.h"
+/*
+ * Per-backend counter for generating speculative insertion tokens.
+ *
+ * This may wrap around, but that's OK as it's only used for the short
+ * duration between inserting a tuple and checking that there are no (unique)
+ * constraint violations. It's theoretically possible that a backend sees a
+ * tuple that was speculatively inserted by another backend, but before it has
+ * started waiting on the token, the other backend completes its insertion,
+ * and then then performs 2^32 unrelated insertions. And after all that, the
+ * first backend finally calls SpeculativeInsertionLockAcquire(), with the
+ * intention of waiting for the first insertion to complete, but ends up
+ * waiting for the latest unrelated insertion instead. Even then, nothing
+ * particularly bad happens: in the worst case they deadlock, causing one of
+ * the transactions to abort.
+ */
+static uint32 speculativeInsertionToken = 0;
+
+
/*
* Struct to hold context info for transaction lock waits.
*
@@ -575,6 +593,73 @@ ConditionalXactLockTableWait(TransactionId xid)
return true;
}
+/*
+ * SpeculativeInsertionLockAcquire
+ *
+ * Insert a lock showing that the given transaction ID is inserting a tuple,
+ * but hasn't yet decided whether it's going to keep it. The lock can then be
+ * used to wait for the decision to go ahead with the insertion, or aborting
+ * it.
+ *
+ * The token is used to distinguish multiple insertions by the same
+ * transaction. It is returned to caller.
+ */
+uint32
+SpeculativeInsertionLockAcquire(TransactionId xid)
+{
+ LOCKTAG tag;
+
+ speculativeInsertionToken++;
+
+ /*
+ * Check for wrap-around. Zero means no token is held, so don't use that.
+ */
+ if (speculativeInsertionToken == 0)
+ speculativeInsertionToken = 1;
+
+ SET_LOCKTAG_SPECULATIVE_INSERTION(tag, xid, speculativeInsertionToken);
+
+ (void) LockAcquire(&tag, ExclusiveLock, false, false);
+
+ return speculativeInsertionToken;
+}
+
+/*
+ * SpeculativeInsertionLockRelease
+ *
+ * Delete the lock showing that the given transaction is speculatively
+ * inserting a tuple.
+ */
+void
+SpeculativeInsertionLockRelease(TransactionId xid)
+{
+ LOCKTAG tag;
+
+ SET_LOCKTAG_SPECULATIVE_INSERTION(tag, xid, speculativeInsertionToken);
+
+ LockRelease(&tag, ExclusiveLock, false);
+}
+
+/*
+ * SpeculativeInsertionWait
+ *
+ * Wait for the specified transaction to finish or abort the insertion of a
+ * tuple.
+ */
+void
+SpeculativeInsertionWait(TransactionId xid, uint32 token)
+{
+ LOCKTAG tag;
+
+ SET_LOCKTAG_SPECULATIVE_INSERTION(tag, xid, token);
+
+ Assert(TransactionIdIsValid(xid));
+ Assert(token != 0);
+
+ (void) LockAcquire(&tag, ShareLock, false, false);
+ LockRelease(&tag, ShareLock, false);
+}
+
/*
* XactLockTableWaitErrorContextCb
* Error context callback for transaction lock waits.
@@ -873,6 +958,12 @@ DescribeLockTag(StringInfo buf, const LOCKTAG *tag)
tag->locktag_field1,
tag->locktag_field2);
break;
+ case LOCKTAG_SPECULATIVE_TOKEN:
+ appendStringInfo(buf,
+ _("speculative token %u of transaction %u"),
+ tag->locktag_field2,
+ tag->locktag_field1);
+ break;
case LOCKTAG_OBJECT:
appendStringInfo(buf,
_("object %u of class %u of database %u"),
diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c
index 9c14e8abdf..bcffd85754 100644
--- a/src/backend/tcop/pquery.c
+++ b/src/backend/tcop/pquery.c
@@ -202,8 +202,14 @@ ProcessQuery(PlannedStmt *plan,
lastOid = queryDesc->estate->es_lastoid;
else
lastOid = InvalidOid;
- snprintf(completionTag, COMPLETION_TAG_BUFSIZE,
- "INSERT %u %u", lastOid, queryDesc->estate->es_processed);
+ if (plan->isUpsert)
+ snprintf(completionTag, COMPLETION_TAG_BUFSIZE,
+ "UPSERT %u %u",
+ lastOid, queryDesc->estate->es_processed);
+ else
+ snprintf(completionTag, COMPLETION_TAG_BUFSIZE,
+ "INSERT %u %u",
+ lastOid, queryDesc->estate->es_processed);
break;
case CMD_UPDATE:
snprintf(completionTag, COMPLETION_TAG_BUFSIZE,
@@ -1356,7 +1362,10 @@ PortalRunMulti(Portal portal, bool isTopLevel,
* 0" here because technically there is no query of the matching tag type,
* and printing a non-zero count for a different query type seems wrong,
* e.g. an INSERT that does an UPDATE instead should not print "0 1" if
- * one row was updated. See QueryRewrite(), step 3, for details.
+ * one row was updated (unless the ON CONFLICT DO UPDATE, or "UPSERT"
+ * variant of INSERT was used to update the row, where it's logically a
+ * direct effect of the top level command). See QueryRewrite(), step 3,
+ * for details.
*/
if (completionTag && completionTag[0] == '\0')
{
@@ -1366,6 +1375,8 @@ PortalRunMulti(Portal portal, bool isTopLevel,
sprintf(completionTag, "SELECT 0 0");
else if (strcmp(completionTag, "INSERT") == 0)
strcpy(completionTag, "INSERT 0 0");
+ else if (strcmp(completionTag, "UPSERT") == 0)
+ strcpy(completionTag, "UPSERT 0 0");
else if (strcmp(completionTag, "UPDATE") == 0)
strcpy(completionTag, "UPDATE 0");
else if (strcmp(completionTag, "DELETE") == 0)
diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c
index 491824dd6b..9d53a8b6a3 100644
--- a/src/backend/utils/adt/lockfuncs.c
+++ b/src/backend/utils/adt/lockfuncs.c
@@ -29,6 +29,7 @@ static const char *const LockTagTypeNames[] = {
"tuple",
"transactionid",
"virtualxid",
+ "speculative token",
"object",
"userlock",
"advisory"
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 69267bdb91..4b3cd85ad9 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -354,6 +354,9 @@ static void get_select_query_def(Query *query, deparse_context *context,
TupleDesc resultDesc);
static void get_insert_query_def(Query *query, deparse_context *context);
static void get_update_query_def(Query *query, deparse_context *context);
+static void get_update_query_targetlist_def(Query *query, List *targetList,
+ deparse_context *context,
+ RangeTblEntry *rte);
static void get_delete_query_def(Query *query, deparse_context *context);
static void get_utility_query_def(Query *query, deparse_context *context);
static void get_basic_select_query(Query *query, deparse_context *context,
@@ -3846,15 +3849,23 @@ set_deparse_planstate(deparse_namespace *dpns, PlanState *ps)
* For a SubqueryScan, pretend the subplan is INNER referent. (We don't
* use OUTER because that could someday conflict with the normal meaning.)
* Likewise, for a CteScan, pretend the subquery's plan is INNER referent.
+ * For ON CONFLICT .. UPDATE we just need the inner tlist to point to the
+ * excluded expression's tlist. (Similar to the SubqueryScan we don't want
+ * to reuse OUTER, it's used for RETURNING in some modify table cases,
+ * although not INSERT .. CONFLICT).
*/
if (IsA(ps, SubqueryScanState))
dpns->inner_planstate = ((SubqueryScanState *) ps)->subplan;
else if (IsA(ps, CteScanState))
dpns->inner_planstate = ((CteScanState *) ps)->cteplanstate;
+ else if (IsA(ps, ModifyTableState))
+ dpns->inner_planstate = ps;
else
dpns->inner_planstate = innerPlanState(ps);
- if (dpns->inner_planstate)
+ if (IsA(ps, ModifyTableState))
+ dpns->inner_tlist = ((ModifyTableState *) ps)->mt_excludedtlist;
+ else if (dpns->inner_planstate)
dpns->inner_tlist = dpns->inner_planstate->plan->targetlist;
else
dpns->inner_tlist = NIL;
@@ -5302,6 +5313,32 @@ get_insert_query_def(Query *query, deparse_context *context)
appendStringInfoString(buf, "DEFAULT VALUES");
}
+ /* Add ON CONFLICT if present */
+ if (query->onConflict)
+ {
+ OnConflictExpr *confl = query->onConflict;
+
+ if (confl->action == ONCONFLICT_NOTHING)
+ {
+ appendStringInfoString(buf, " ON CONFLICT DO NOTHING");
+ }
+ else
+ {
+ appendStringInfoString(buf, " ON CONFLICT DO UPDATE SET ");
+ /* Deparse targetlist */
+ get_update_query_targetlist_def(query, confl->onConflictSet,
+ context, rte);
+
+ /* Add a WHERE clause if given */
+ if (confl->onConflictWhere != NULL)
+ {
+ appendContextKeyword(context, " WHERE ",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
+ get_rule_expr(confl->onConflictWhere, context, false);
+ }
+ }
+ }
+
/* Add RETURNING if present */
if (query->returningList)
{
@@ -5321,12 +5358,6 @@ get_update_query_def(Query *query, deparse_context *context)
{
StringInfo buf = context->buf;
RangeTblEntry *rte;
- List *ma_sublinks;
- ListCell *next_ma_cell;
- SubLink *cur_ma_sublink;
- int remaining_ma_columns;
- const char *sep;
- ListCell *l;
/* Insert the WITH clause if given */
get_with_clause(query, context);
@@ -5349,6 +5380,46 @@ get_update_query_def(Query *query, deparse_context *context)
quote_identifier(rte->alias->aliasname));
appendStringInfoString(buf, " SET ");
+ /* Deparse targetlist */
+ get_update_query_targetlist_def(query, query->targetList, context, rte);
+
+ /* Add the FROM clause if needed */
+ get_from_clause(query, " FROM ", context);
+
+ /* Add a WHERE clause if given */
+ if (query->jointree->quals != NULL)
+ {
+ appendContextKeyword(context, " WHERE ",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
+ get_rule_expr(query->jointree->quals, context, false);
+ }
+
+ /* Add RETURNING if present */
+ if (query->returningList)
+ {
+ appendContextKeyword(context, " RETURNING",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
+ get_target_list(query->returningList, context, NULL);
+ }
+}
+
+
+/* ----------
+ * get_update_query_targetlist_def - Parse back an UPDATE targetlist
+ * ----------
+ */
+static void
+get_update_query_targetlist_def(Query *query, List *targetList,
+ deparse_context *context, RangeTblEntry *rte)
+{
+ StringInfo buf = context->buf;
+ ListCell *l;
+ ListCell *next_ma_cell;
+ int remaining_ma_columns;
+ const char *sep;
+ SubLink *cur_ma_sublink;
+ List *ma_sublinks;
+
/*
* Prepare to deal with MULTIEXPR assignments: collect the source SubLinks
* into a list. We expect them to appear, in ID order, in resjunk tlist
@@ -5357,7 +5428,7 @@ get_update_query_def(Query *query, deparse_context *context)
ma_sublinks = NIL;
if (query->hasSubLinks) /* else there can't be any */
{
- foreach(l, query->targetList)
+ foreach(l, targetList)
{
TargetEntry *tle = (TargetEntry *) lfirst(l);
@@ -5379,7 +5450,7 @@ get_update_query_def(Query *query, deparse_context *context)
/* Add the comma separated list of 'attname = value' */
sep = "";
- foreach(l, query->targetList)
+ foreach(l, targetList)
{
TargetEntry *tle = (TargetEntry *) lfirst(l);
Node *expr;
@@ -5470,25 +5541,6 @@ get_update_query_def(Query *query, deparse_context *context)
get_rule_expr(expr, context, false);
}
-
- /* Add the FROM clause if needed */
- get_from_clause(query, " FROM ", context);
-
- /* Add a WHERE clause if given */
- if (query->jointree->quals != NULL)
- {
- appendContextKeyword(context, " WHERE ",
- -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
- get_rule_expr(query->jointree->quals, context, false);
- }
-
- /* Add RETURNING if present */
- if (query->returningList)
- {
- appendContextKeyword(context, " RETURNING",
- -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
- get_target_list(query->returningList, context, NULL);
- }
}
diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c
index a4a478d114..b4284d6d94 100644
--- a/src/backend/utils/time/tqual.c
+++ b/src/backend/utils/time/tqual.c
@@ -405,6 +405,13 @@ HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot,
}
}
}
+ /*
+ * An invalid Xmin can be left behind by a speculative insertion that
+ * is cancelled by super-deleting the tuple. We shouldn't see any of
+ * those in TOAST tables, but better safe than sorry.
+ */
+ else if (!TransactionIdIsValid(HeapTupleHeaderGetXmin(tuple)))
+ return false;
}
/* otherwise assume the tuple is valid for TOAST. */
@@ -714,8 +721,11 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
* output argument to return the xids of concurrent xacts that affected the
* tuple. snapshot->xmin is set to the tuple's xmin if that is another
* transaction that's still in progress; or to InvalidTransactionId if the
- * tuple's xmin is committed good, committed dead, or my own xact. Similarly
- * for snapshot->xmax and the tuple's xmax.
+ * tuple's xmin is committed good, committed dead, or my own xact.
+ * Similarly for snapshot->xmax and the tuple's xmax. If the tuple was
+ * inserted speculatively, meaning that the inserter might still back down
+ * on the insertion without aborting the whole transaction, the associated
+ * token is also returned in snapshot->speculativeToken.
*/
bool
HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
@@ -727,6 +737,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
Assert(htup->t_tableOid != InvalidOid);
snapshot->xmin = snapshot->xmax = InvalidTransactionId;
+ snapshot->speculativeToken = 0;
if (!HeapTupleHeaderXminCommitted(tuple))
{
@@ -808,6 +819,20 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
}
else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple)))
{
+ /*
+ * Return the speculative token to caller. Caller can worry
+ * about xmax, since it requires a conclusively locked row
+ * version, and a concurrent update to this tuple is a conflict
+ * of its purposes.
+ */
+ if (HeapTupleHeaderIsSpeculative(tuple))
+ {
+ snapshot->speculativeToken =
+ HeapTupleHeaderGetSpeculativeToken(tuple);
+
+ Assert(snapshot->speculativeToken != 0);
+ }
+
snapshot->xmin = HeapTupleHeaderGetRawXmin(tuple);
/* XXX shouldn't we fall through to look at xmax? */
return true; /* in insertion by other */
diff --git a/src/bin/psql/common.c b/src/bin/psql/common.c
index ff01368531..f4155f7877 100644
--- a/src/bin/psql/common.c
+++ b/src/bin/psql/common.c
@@ -894,9 +894,12 @@ PrintQueryResults(PGresult *results)
success = StoreQueryTuple(results);
else
success = PrintQueryTuples(results);
- /* if it's INSERT/UPDATE/DELETE RETURNING, also print status */
+ /*
+ * if it's INSERT/UPSERT/UPDATE/DELETE RETURNING, also print status
+ */
cmdstatus = PQcmdStatus(results);
if (strncmp(cmdstatus, "INSERT", 6) == 0 ||
+ strncmp(cmdstatus, "UPSERT", 6) == 0 ||
strncmp(cmdstatus, "UPDATE", 6) == 0 ||
strncmp(cmdstatus, "DELETE", 6) == 0)
PrintQueryStatus(results);
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 888cce7a2d..49c8ca4d66 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -28,6 +28,7 @@
#define HEAP_INSERT_SKIP_WAL 0x0001
#define HEAP_INSERT_SKIP_FSM 0x0002
#define HEAP_INSERT_FROZEN 0x0004
+#define HEAP_INSERT_SPECULATIVE 0x0008
typedef struct BulkInsertStateData *BulkInsertState;
@@ -142,6 +143,8 @@ extern void heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
extern HTSU_Result heap_delete(Relation relation, ItemPointer tid,
CommandId cid, Snapshot crosscheck, bool wait,
HeapUpdateFailureData *hufd);
+extern void heap_finish_speculative(Relation relation, HeapTuple tuple);
+extern void heap_abort_speculative(Relation relation, HeapTuple tuple);
extern HTSU_Result heap_update(Relation relation, ItemPointer otid,
HeapTuple newtup,
CommandId cid, Snapshot crosscheck, bool wait,
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index f0f89dec0f..caa0f14f4b 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -34,7 +34,7 @@
#define XLOG_HEAP_UPDATE 0x20
/* 0x030 is free, was XLOG_HEAP_MOVE */
#define XLOG_HEAP_HOT_UPDATE 0x40
-/* 0x050 is free, was XLOG_HEAP_NEWPAGE */
+#define XLOG_HEAP_CONFIRM 0x50
#define XLOG_HEAP_LOCK 0x60
#define XLOG_HEAP_INPLACE 0x70
@@ -60,23 +60,43 @@
#define XLOG_HEAP2_NEW_CID 0x70
/*
- * xl_heap_* ->flag values, 8 bits are available.
+ * xl_heap_insert/xl_heap_multi_insert flag values, 8 bits are available.
*/
/* PD_ALL_VISIBLE was cleared */
-#define XLOG_HEAP_ALL_VISIBLE_CLEARED (1<<0)
+#define XLH_INSERT_ALL_VISIBLE_CLEARED (1<<0)
+#define XLH_INSERT_LAST_IN_MULTI (1<<1)
+#define XLH_INSERT_IS_SPECULATIVE (1<<2)
+#define XLH_INSERT_CONTAINS_NEW_TUPLE (1<<3)
+
+/*
+ * xl_heap_update flag values, 8 bits are available.
+ */
+/* PD_ALL_VISIBLE was cleared */
+#define XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED (1<<0)
/* PD_ALL_VISIBLE was cleared in the 2nd page */
-#define XLOG_HEAP_NEW_ALL_VISIBLE_CLEARED (1<<1)
-#define XLOG_HEAP_CONTAINS_OLD_TUPLE (1<<2)
-#define XLOG_HEAP_CONTAINS_OLD_KEY (1<<3)
-#define XLOG_HEAP_CONTAINS_NEW_TUPLE (1<<4)
-#define XLOG_HEAP_PREFIX_FROM_OLD (1<<5)
-#define XLOG_HEAP_SUFFIX_FROM_OLD (1<<6)
-/* last xl_heap_multi_insert record for one heap_multi_insert() call */
-#define XLOG_HEAP_LAST_MULTI_INSERT (1<<7)
+#define XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED (1<<1)
+#define XLH_UPDATE_CONTAINS_OLD_TUPLE (1<<2)
+#define XLH_UPDATE_CONTAINS_OLD_KEY (1<<3)
+#define XLH_UPDATE_CONTAINS_NEW_TUPLE (1<<4)
+#define XLH_UPDATE_PREFIX_FROM_OLD (1<<5)
+#define XLH_UPDATE_SUFFIX_FROM_OLD (1<<6)
/* convenience macro for checking whether any form of old tuple was logged */
-#define XLOG_HEAP_CONTAINS_OLD \
- (XLOG_HEAP_CONTAINS_OLD_TUPLE | XLOG_HEAP_CONTAINS_OLD_KEY)
+#define XLH_UPDATE_CONTAINS_OLD \
+ (XLH_UPDATE_CONTAINS_OLD_TUPLE | XLH_UPDATE_CONTAINS_OLD_KEY)
+
+/*
+ * xl_heap_delete flag values, 8 bits are available.
+ */
+/* PD_ALL_VISIBLE was cleared */
+#define XLH_DELETE_ALL_VISIBLE_CLEARED (1<<0)
+#define XLH_DELETE_CONTAINS_OLD_TUPLE (1<<1)
+#define XLH_DELETE_CONTAINS_OLD_KEY (1<<2)
+#define XLH_DELETE_IS_SUPER (1<<3)
+
+/* convenience macro for checking whether any form of old tuple was logged */
+#define XLH_DELETE_CONTAINS_OLD \
+ (XLH_DELETE_CONTAINS_OLD_TUPLE | XLH_DELETE_CONTAINS_OLD_KEY)
/* This is what we need to know about delete */
typedef struct xl_heap_delete
@@ -243,6 +263,14 @@ typedef struct xl_heap_lock_updated
#define SizeOfHeapLockUpdated (offsetof(xl_heap_lock_updated, infobits_set) + sizeof(uint8))
+/* This is what we need to know about confirmation of speculative insertion */
+typedef struct xl_heap_confirm
+{
+ OffsetNumber offnum; /* confirmed tuple's offset on page */
+} xl_heap_confirm;
+
+#define SizeOfHeapConfirm (offsetof(xl_heap_confirm, offnum) + sizeof(OffsetNumber))
+
/* This is what we need to know about in-place update */
typedef struct xl_heap_inplace
{
diff --git a/src/include/access/hio.h b/src/include/access/hio.h
index b0140298b1..b3b91e70d5 100644
--- a/src/include/access/hio.h
+++ b/src/include/access/hio.h
@@ -36,7 +36,7 @@ typedef struct BulkInsertStateData
extern void RelationPutHeapTuple(Relation relation, Buffer buffer,
- HeapTuple tuple);
+ HeapTuple tuple, bool token);
extern Buffer RelationGetBufferForTuple(Relation relation, Size len,
Buffer otherBuffer, int options,
BulkInsertState bistate,
diff --git a/src/include/access/htup_details.h b/src/include/access/htup_details.h
index 0a673cd526..80285acc3b 100644
--- a/src/include/access/htup_details.h
+++ b/src/include/access/htup_details.h
@@ -96,6 +96,15 @@
* unrelated tuple stored into a slot recently freed by VACUUM. If either
* check fails, one may assume that there is no live descendant version.
*
+ * t_ctid is sometimes used to store a speculative insertion token, instead
+ * of a real TID. A speculative token is set on a tuple that's being
+ * inserted, until the inserter is sure that it wants to go ahead with the
+ * insertion. Hence a token should only be seen on a tuple with an XMAX
+ * that's still in-progress, or invalid/aborted. The token is replaced with
+ * the tuple's real TID when the insertion is confirmed. One should never
+ * see a speculative insertion token while following a chain of t_ctid links,
+ * because they are not used on updates, only insertions.
+ *
* Following the fixed header fields, the nulls bitmap is stored (beginning
* at t_bits). The bitmap is *not* stored if t_infomask shows that there
* are no nulls in the tuple. If an OID field is present (as indicated by
@@ -138,7 +147,8 @@ struct HeapTupleHeaderData
DatumTupleFields t_datum;
} t_choice;
- ItemPointerData t_ctid; /* current TID of this or newer tuple */
+ ItemPointerData t_ctid; /* current TID of this or newer tuple (or a
+ * speculative insertion token) */
/* Fields below here must match MinimalTupleData! */
@@ -241,6 +251,14 @@ struct HeapTupleHeaderData
*/
#define HEAP_TUPLE_HAS_MATCH HEAP_ONLY_TUPLE /* tuple has a join match */
+/*
+ * Special value used in t_ctid.ip_posid, to indicate that it holds a
+ * speculative insertion token rather than a real TID. This must be higher
+ * than MaxOffsetNumber, so that it can be distinguished from a valid
+ * offset number in a regular item pointer.
+ */
+#define SpecTokenOffsetNumber 0xfffe
+
/*
* HeapTupleHeader accessor macros
*
@@ -377,6 +395,22 @@ do { \
(tup)->t_choice.t_heap.t_field3.t_xvac = (xid); \
} while (0)
+#define HeapTupleHeaderIsSpeculative(tup) \
+( \
+ (tup)->t_ctid.ip_posid == SpecTokenOffsetNumber \
+)
+
+#define HeapTupleHeaderGetSpeculativeToken(tup) \
+( \
+ AssertMacro(HeapTupleHeaderIsSpeculative(tup)), \
+ ItemPointerGetBlockNumber(&(tup)->t_ctid) \
+)
+
+#define HeapTupleHeaderSetSpeculativeToken(tup, token) \
+( \
+ ItemPointerSet(&(tup)->t_ctid, token, SpecTokenOffsetNumber) \
+)
+
#define HeapTupleHeaderGetDatumLength(tup) \
VARSIZE(tup)
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 3d2c5b2a29..969a8e3c35 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201505081
+#define CATALOG_VERSION_NO 201505082
#endif
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index a04def96e4..06f38202a5 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -81,6 +81,8 @@ extern void index_drop(Oid indexId, bool concurrent);
extern IndexInfo *BuildIndexInfo(Relation index);
+extern void BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii);
+
extern void FormIndexDatum(IndexInfo *indexInfo,
TupleTableSlot *slot,
EState *estate,
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 33c8fad844..1b68b54c7d 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -195,6 +195,7 @@ extern void ExecConstraints(ResultRelInfo *resultRelInfo,
TupleTableSlot *slot, EState *estate);
extern void ExecWithCheckOptions(WCOKind kind, ResultRelInfo *resultRelInfo,
TupleTableSlot *slot, EState *estate);
+extern LockTupleMode ExecUpdateLockMode(EState *estate, ResultRelInfo *relinfo);
extern ExecRowMark *ExecFindRowMark(EState *estate, Index rti);
extern ExecAuxRowMark *ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist);
extern TupleTableSlot *EvalPlanQual(EState *estate, EPQState *epqstate,
@@ -361,16 +362,18 @@ extern void UnregisterExprContextCallback(ExprContext *econtext,
/*
* prototypes from functions in execIndexing.c
*/
-extern void ExecOpenIndices(ResultRelInfo *resultRelInfo);
+extern void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative);
extern void ExecCloseIndices(ResultRelInfo *resultRelInfo);
extern List *ExecInsertIndexTuples(TupleTableSlot *slot, ItemPointer tupleid,
- EState *estate);
-extern bool check_exclusion_constraint(Relation heap, Relation index,
+ EState *estate, bool noDupErr, bool *specConflict,
+ List *arbiterIndexes);
+extern bool ExecCheckIndexConstraints(TupleTableSlot *slot, EState *estate,
+ ItemPointer conflictTid, List *arbiterIndexes);
+extern void check_exclusion_constraint(Relation heap, Relation index,
IndexInfo *indexInfo,
ItemPointer tupleid,
Datum *values, bool *isnull,
- EState *estate,
- bool newIndex, bool errorOK);
+ EState *estate, bool newIndex);
#endif /* EXECUTOR_H */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index dac542fbc1..210dbe4a53 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -41,6 +41,9 @@
* ExclusionOps Per-column exclusion operators, or NULL if none
* ExclusionProcs Underlying function OIDs for ExclusionOps
* ExclusionStrats Opclass strategy numbers for ExclusionOps
+ * UniqueOps Theses are like Exclusion*, but for unique indexes
+ * UniqueProcs
+ * UniqueStrats
* Unique is it a unique index?
* ReadyForInserts is it valid for inserts?
* Concurrent are we doing a concurrent index build?
@@ -62,6 +65,9 @@ typedef struct IndexInfo
Oid *ii_ExclusionOps; /* array with one entry per column */
Oid *ii_ExclusionProcs; /* array with one entry per column */
uint16 *ii_ExclusionStrats; /* array with one entry per column */
+ Oid *ii_UniqueOps; /* array with one entry per column */
+ Oid *ii_UniqueProcs; /* array with one entry per column */
+ uint16 *ii_UniqueStrats; /* array with one entry per column */
bool ii_Unique;
bool ii_ReadyForInserts;
bool ii_Concurrent;
@@ -308,6 +314,8 @@ typedef struct JunkFilter
* ConstraintExprs array of constraint-checking expr states
* junkFilter for removing junk attributes from tuples
* projectReturning for computing a RETURNING list
+ * onConflictSetProj for computing ON CONFLICT DO UPDATE SET
+ * onConflictSetWhere list of ON CONFLICT DO UPDATE exprs (qual)
* ----------------
*/
typedef struct ResultRelInfo
@@ -329,6 +337,8 @@ typedef struct ResultRelInfo
List **ri_ConstraintExprs;
JunkFilter *ri_junkFilter;
ProjectionInfo *ri_projectReturning;
+ ProjectionInfo *ri_onConflictSetProj;
+ List *ri_onConflictSetWhere;
} ResultRelInfo;
/* ----------------
@@ -1094,6 +1104,11 @@ typedef struct ModifyTableState
List **mt_arowmarks; /* per-subplan ExecAuxRowMark lists */
EPQState mt_epqstate; /* for evaluating EvalPlanQual rechecks */
bool fireBSTriggers; /* do we need to fire stmt triggers? */
+ OnConflictAction mt_onconflict; /* ON CONFLICT type */
+ List *mt_arbiterindexes; /* unique index OIDs to arbitrate taking alt path */
+ TupleTableSlot *mt_existing; /* slot to store existing target tuple in */
+ List *mt_excludedtlist; /* the excluded pseudo relation's tlist */
+ TupleTableSlot *mt_conflproj; /* FIXME*/
} ModifyTableState;
/* ----------------
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 8991f3f80c..768f413a45 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -168,10 +168,12 @@ typedef enum NodeTag
T_CoerceToDomainValue,
T_SetToDefault,
T_CurrentOfExpr,
+ T_InferenceElem,
T_TargetEntry,
T_RangeTblRef,
T_JoinExpr,
T_FromExpr,
+ T_OnConflictExpr,
T_IntoClause,
/*
@@ -413,6 +415,8 @@ typedef enum NodeTag
T_RowMarkClause,
T_XmlSerialize,
T_WithClause,
+ T_InferClause,
+ T_OnConflictClause,
T_CommonTableExpr,
T_RoleSpec,
@@ -626,4 +630,17 @@ typedef enum JoinType
(1 << JOIN_RIGHT) | \
(1 << JOIN_ANTI))) != 0)
+/*
+ * OnConflictAction -
+ * "ON CONFLICT" clause type of query
+ *
+ * This is needed in both parsenodes.h and plannodes.h, so put it here...
+ */
+typedef enum OnConflictAction
+{
+ ONCONFLICT_NONE, /* No "ON CONFLICT" clause */
+ ONCONFLICT_NOTHING, /* ON CONFLICT ... DO NOTHING */
+ ONCONFLICT_UPDATE /* ON CONFLICT ... DO UPDATE */
+} OnConflictAction;
+
#endif /* NODES_H */
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 7d15ef2847..91ca9c6fd0 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -132,6 +132,8 @@ typedef struct Query
List *withCheckOptions; /* a list of WithCheckOption's */
+ OnConflictExpr *onConflict; /* ON CONFLICT DO [NOTHING | UPDATE] */
+
List *returningList; /* return-values list (of TargetEntry) */
List *groupClause; /* a list of SortGroupClause's */
@@ -591,7 +593,7 @@ typedef enum TableLikeOption
} TableLikeOption;
/*
- * IndexElem - index parameters (used in CREATE INDEX)
+ * IndexElem - index parameters (used in CREATE INDEX, and in ON CONFLICT)
*
* For a plain index attribute, 'name' is the name of the table column to
* index, and 'expr' is NULL. For an index expression, 'name' is NULL and
@@ -735,9 +737,9 @@ typedef struct XmlSerialize
* For SELECT/INSERT/UPDATE permissions, if the user doesn't have
* table-wide permissions then it is sufficient to have the permissions
* on all columns identified in selectedCols (for SELECT) and/or
- * insertedCols and/or updatedCols (INSERT with ON CONFLICT UPDATE may
- * have all 3). selectedCols, insertedCols and updatedCols are
- * bitmapsets, which cannot have negative integer members, so we subtract
+ * insertedCols and/or updatedCols (INSERT with ON CONFLICT DO UPDATE may
+ * have all 3). selectedCols, insertedCols and updatedCols are bitmapsets,
+ * which cannot have negative integer members, so we subtract
* FirstLowInvalidHeapAttributeNumber from column numbers before storing
* them in these fields. A whole-row Var reference is represented by
* setting the bit for InvalidAttrNumber.
@@ -881,7 +883,8 @@ typedef enum WCOKind
{
WCO_VIEW_CHECK, /* WCO on an auto-updatable view */
WCO_RLS_INSERT_CHECK, /* RLS INSERT WITH CHECK policy */
- WCO_RLS_UPDATE_CHECK /* RLS UPDATE WITH CHECK policy */
+ WCO_RLS_UPDATE_CHECK, /* RLS UPDATE WITH CHECK policy */
+ WCO_RLS_CONFLICT_CHECK /* RLS ON CONFLICT DO UPDATE USING policy */
} WCOKind;
typedef struct WithCheckOption
@@ -1025,6 +1028,37 @@ typedef struct WithClause
int location; /* token location, or -1 if unknown */
} WithClause;
+/*
+ * InferClause -
+ * ON CONFLICT unique index inference clause
+ *
+ * Note: InferClause does not propagate into the Query representation.
+ */
+typedef struct InferClause
+{
+ NodeTag type;
+ List *indexElems; /* IndexElems to infer unique index */
+ Node *whereClause; /* qualification (partial-index predicate) */
+ char *conname; /* Constraint name, or NULL if unnamed */
+ int location; /* token location, or -1 if unknown */
+} InferClause;
+
+/*
+ * OnConflictClause -
+ * representation of ON CONFLICT clause
+ *
+ * Note: OnConflictClause does not propagate into the Query representation.
+ */
+typedef struct OnConflictClause
+{
+ NodeTag type;
+ OnConflictAction action; /* DO NOTHING or UPDATE? */
+ InferClause *infer; /* Optional index inference clause */
+ List *targetList; /* the target list (of ResTarget) */
+ Node *whereClause; /* qualifications */
+ int location; /* token location, or -1 if unknown */
+} OnConflictClause;
+
/*
* CommonTableExpr -
* representation of WITH list element
@@ -1075,6 +1109,7 @@ typedef struct InsertStmt
RangeVar *relation; /* relation to insert into */
List *cols; /* optional: names of the target columns */
Node *selectStmt; /* the source SELECT/VALUES, or NULL */
+ OnConflictClause *onConflictClause; /* ON CONFLICT clause */
List *returningList; /* list of expressions to return */
WithClause *withClause; /* WITH clause */
} InsertStmt;
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index baeba2d330..c63492fa0b 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -45,6 +45,8 @@ typedef struct PlannedStmt
bool hasModifyingCTE; /* has insert|update|delete in WITH? */
+ bool isUpsert; /* is it insert ... ON CONFLICT UPDATE? */
+
bool canSetTag; /* do I set the command result tag? */
bool transientPlan; /* redo plan when TransactionXmin changes? */
@@ -183,6 +185,12 @@ typedef struct ModifyTable
List *fdwPrivLists; /* per-target-table FDW private data lists */
List *rowMarks; /* PlanRowMarks (non-locking only) */
int epqParam; /* ID of Param for EvalPlanQual re-eval */
+ OnConflictAction onConflictAction; /* ON CONFLICT action */
+ List *arbiterIndexes; /* List of ON CONFLICT arbiter index OIDs */
+ List *onConflictSet; /* SET for INSERT ON CONFLICT DO UPDATE */
+ Node *onConflictWhere;/* WHERE for ON CONFLICT UPDATE */
+ Index exclRelRTI; /* RTI of the EXCLUDED pseudo relation */
+ List *exclRelTlist; /* tlist of the EXCLUDED pseudo relation */
} ModifyTable;
/* ----------------
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 4f1d234d30..8f2c64847e 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1143,6 +1143,22 @@ typedef struct CurrentOfExpr
int cursor_param; /* refcursor parameter number, or 0 */
} CurrentOfExpr;
+/*
+ * InferenceElem - an element of a unique index inference specification
+ *
+ * This mostly matches the structure of IndexElems, but having a dedicated
+ * primnode allows for a clean separation between the use of index parameters
+ * by utility commands, and this node.
+ */
+typedef struct InferenceElem
+{
+ Expr xpr;
+ Node *expr; /* expression to infer from, or NULL */
+ Oid infercollid; /* OID of collation, or InvalidOid */
+ Oid inferopfamily; /* OID of att opfamily, or InvalidOid */
+ Oid inferopcinputtype; /* OID of att input type, or InvalidOid */
+} InferenceElem;
+
/*--------------------
* TargetEntry -
* a target entry (used in query target lists)
@@ -1307,4 +1323,30 @@ typedef struct FromExpr
Node *quals; /* qualifiers on join, if any */
} FromExpr;
+/*----------
+ * OnConflictExpr - represents an ON CONFLICT DO ... expression
+ *
+ * The optimizer requires a list of inference elements, and optionally a WHERE
+ * clause to infer a unique index. The unique index (or, occasionally,
+ * indexes) inferred are used to arbitrate whether or not the alternative ON
+ * CONFLICT path is taken.
+ *----------
+ */
+typedef struct OnConflictExpr
+{
+ NodeTag type;
+ OnConflictAction action; /* DO NOTHING or UPDATE? */
+
+ /* Arbiter */
+ List *arbiterElems; /* unique index arbiter list (of InferenceElem's) */
+ Node *arbiterWhere; /* unique index arbiter WHERE clause */
+ Oid constraint; /* pg_constraint OID for arbiter */
+
+ /* ON CONFLICT UPDATE */
+ List *onConflictSet; /* List of ON CONFLICT SET TargetEntrys */
+ Node *onConflictWhere;/* qualifiers to restrict UPDATE to */
+ int exclRelIndex; /* RT index of 'excluded' relation */
+ List *exclRelTlist; /* tlist of the EXCLUDED pseudo relation */
+} OnConflictExpr;
+
#endif /* PRIMNODES_H */
diff --git a/src/include/optimizer/plancat.h b/src/include/optimizer/plancat.h
index 8eb2e57d7b..11e7d4d26b 100644
--- a/src/include/optimizer/plancat.h
+++ b/src/include/optimizer/plancat.h
@@ -28,6 +28,8 @@ extern PGDLLIMPORT get_relation_info_hook_type get_relation_info_hook;
extern void get_relation_info(PlannerInfo *root, Oid relationObjectId,
bool inhparent, RelOptInfo *rel);
+extern List *infer_arbiter_indexes(PlannerInfo *root);
+
extern void estimate_rel_size(Relation rel, int32 *attr_widths,
BlockNumber *pages, double *tuples, double *allvisfrac);
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 0c8cbcded9..1d4ab0488e 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -86,7 +86,7 @@ extern ModifyTable *make_modifytable(PlannerInfo *root,
Index nominalRelation,
List *resultRelations, List *subplans,
List *withCheckOptionLists, List *returningLists,
- List *rowMarks, int epqParam);
+ List *rowMarks, OnConflictExpr *onconflict, int epqParam);
extern bool is_projection_capable_plan(Plan *plan);
/*
diff --git a/src/include/optimizer/prep.h b/src/include/optimizer/prep.h
index 05e46c5b78..dcd078ee43 100644
--- a/src/include/optimizer/prep.h
+++ b/src/include/optimizer/prep.h
@@ -45,6 +45,9 @@ extern void expand_security_quals(PlannerInfo *root, List *tlist);
*/
extern List *preprocess_targetlist(PlannerInfo *root, List *tlist);
+extern List *preprocess_onconflict_targetlist(List *tlist,
+ int result_relation, List *range_table);
+
extern PlanRowMark *get_plan_rowmark(List *rowmarks, Index rtindex);
/*
diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h
index 5b1ee15424..faea99108c 100644
--- a/src/include/parser/kwlist.h
+++ b/src/include/parser/kwlist.h
@@ -87,6 +87,7 @@ PG_KEYWORD("commit", COMMIT, UNRESERVED_KEYWORD)
PG_KEYWORD("committed", COMMITTED, UNRESERVED_KEYWORD)
PG_KEYWORD("concurrently", CONCURRENTLY, TYPE_FUNC_NAME_KEYWORD)
PG_KEYWORD("configuration", CONFIGURATION, UNRESERVED_KEYWORD)
+PG_KEYWORD("conflict", CONFLICT, UNRESERVED_KEYWORD)
PG_KEYWORD("connection", CONNECTION, UNRESERVED_KEYWORD)
PG_KEYWORD("constraint", CONSTRAINT, RESERVED_KEYWORD)
PG_KEYWORD("constraints", CONSTRAINTS, UNRESERVED_KEYWORD)
diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h
index 6a4438f556..f1b7d3d896 100644
--- a/src/include/parser/parse_clause.h
+++ b/src/include/parser/parse_clause.h
@@ -41,6 +41,10 @@ extern List *transformDistinctClause(ParseState *pstate,
List **targetlist, List *sortClause, bool is_agg);
extern List *transformDistinctOnClause(ParseState *pstate, List *distinctlist,
List **targetlist, List *sortClause);
+extern void transformOnConflictArbiter(ParseState *pstate,
+ OnConflictClause *onConflictClause,
+ List **arbiterExpr, Node **arbiterWhere,
+ Oid *constraint);
extern List *addTargetToSortList(ParseState *pstate, TargetEntry *tle,
List *sortlist, List *targetlist, SortBy *sortby,
diff --git a/src/include/replication/reorderbuffer.h b/src/include/replication/reorderbuffer.h
index 6a5528a734..928b1ca170 100644
--- a/src/include/replication/reorderbuffer.h
+++ b/src/include/replication/reorderbuffer.h
@@ -43,6 +43,11 @@ typedef struct ReorderBufferTupleBuf
* and ComboCids in the same list with the user visible INSERT/UPDATE/DELETE
* changes. Users of the decoding facilities will never see changes with
* *_INTERNAL_* actions.
+ *
+ * The INTERNAL_SPEC_INSERT and INTERNAL_SPEC_CONFIRM changes concern
+ * "speculative insertions", and their confirmation respectively. They're
+ * used by INSERT .. ON CONFLICT .. UPDATE. Users of logical decoding don't
+ * have to care about these.
*/
enum ReorderBufferChangeType
{
@@ -51,7 +56,9 @@ enum ReorderBufferChangeType
REORDER_BUFFER_CHANGE_DELETE,
REORDER_BUFFER_CHANGE_INTERNAL_SNAPSHOT,
REORDER_BUFFER_CHANGE_INTERNAL_COMMAND_ID,
- REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID
+ REORDER_BUFFER_CHANGE_INTERNAL_TUPLECID,
+ REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT,
+ REORDER_BUFFER_CHANGE_INTERNAL_SPEC_CONFIRM
};
/*
diff --git a/src/include/rewrite/rowsecurity.h b/src/include/rewrite/rowsecurity.h
index 115c9a8e43..eb4b20559f 100644
--- a/src/include/rewrite/rowsecurity.h
+++ b/src/include/rewrite/rowsecurity.h
@@ -41,7 +41,8 @@ extern PGDLLIMPORT row_security_policy_hook_type row_security_policy_hook_permis
extern PGDLLIMPORT row_security_policy_hook_type row_security_policy_hook_restrictive;
-extern void get_row_security_policies(Query* root, RangeTblEntry* rte, int rt_index,
+extern void get_row_security_policies(Query *root, CmdType commandType,
+ RangeTblEntry *rte, int rt_index,
List **securityQuals, List **withCheckOptions,
bool *hasRowSecurity, bool *hasSubLinks);
diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h
index f5d70e5141..7cc75fc106 100644
--- a/src/include/storage/lmgr.h
+++ b/src/include/storage/lmgr.h
@@ -76,6 +76,11 @@ extern bool ConditionalXactLockTableWait(TransactionId xid);
extern void WaitForLockers(LOCKTAG heaplocktag, LOCKMODE lockmode);
extern void WaitForLockersMultiple(List *locktags, LOCKMODE lockmode);
+/* Lock an XID for tuple insertion (used to wait for an insertion to finish) */
+extern uint32 SpeculativeInsertionLockAcquire(TransactionId xid);
+extern void SpeculativeInsertionLockRelease(TransactionId xid);
+extern void SpeculativeInsertionWait(TransactionId xid, uint32 token);
+
/* Lock a general object (other than a relation) of the current database */
extern void LockDatabaseObject(Oid classid, Oid objid, uint16 objsubid,
LOCKMODE lockmode);
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
index dae517f3fe..b4eb1b4a9e 100644
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -176,6 +176,8 @@ typedef enum LockTagType
/* ID info for a transaction is its TransactionId */
LOCKTAG_VIRTUALTRANSACTION, /* virtual transaction (ditto) */
/* ID info for a virtual transaction is its VirtualTransactionId */
+ LOCKTAG_SPECULATIVE_TOKEN, /* speculative insertion Xid and token */
+ /* ID info for a transaction is its TransactionId */
LOCKTAG_OBJECT, /* non-relation database object */
/* ID info for an object is DB OID + CLASS OID + OBJECT OID + SUBID */
@@ -261,6 +263,14 @@ typedef struct LOCKTAG
(locktag).locktag_type = LOCKTAG_VIRTUALTRANSACTION, \
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
+#define SET_LOCKTAG_SPECULATIVE_INSERTION(locktag,xid,token) \
+ ((locktag).locktag_field1 = (xid), \
+ (locktag).locktag_field2 = (token), \
+ (locktag).locktag_field3 = 0, \
+ (locktag).locktag_field4 = 0, \
+ (locktag).locktag_type = LOCKTAG_SPECULATIVE_TOKEN, \
+ (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
+
#define SET_LOCKTAG_OBJECT(locktag,dboid,classoid,objoid,objsubid) \
((locktag).locktag_field1 = (dboid), \
(locktag).locktag_field2 = (classoid), \
diff --git a/src/include/utils/snapshot.h b/src/include/utils/snapshot.h
index 26fb2573c7..a734bf0075 100644
--- a/src/include/utils/snapshot.h
+++ b/src/include/utils/snapshot.h
@@ -69,31 +69,41 @@ typedef struct SnapshotData
* progress, unless the snapshot was taken during recovery in which case
* it's empty. For historic MVCC snapshots, the meaning is inverted, i.e.
* it contains *committed* transactions between xmin and xmax.
+ *
+ * note: all ids in xip[] satisfy xmin <= xip[i] < xmax
*/
TransactionId *xip;
uint32 xcnt; /* # of xact ids in xip[] */
- /* note: all ids in xip[] satisfy xmin <= xip[i] < xmax */
- int32 subxcnt; /* # of xact ids in subxip[] */
/*
* For non-historic MVCC snapshots, this contains subxact IDs that are in
* progress (and other transactions that are in progress if taken during
* recovery). For historic snapshot it contains *all* xids assigned to the
* replayed transaction, including the toplevel xid.
+ *
+ * note: all ids in subxip[] are >= xmin, but we don't bother filtering
+ * out any that are >= xmax
*/
TransactionId *subxip;
+ int32 subxcnt; /* # of xact ids in subxip[] */
bool suboverflowed; /* has the subxip array overflowed? */
+
bool takenDuringRecovery; /* recovery-shaped snapshot? */
bool copied; /* false if it's a static snapshot */
+ CommandId curcid; /* in my xact, CID < curcid are visible */
+
/*
- * note: all ids in subxip[] are >= xmin, but we don't bother filtering
- * out any that are >= xmax
+ * An extra return value for HeapTupleSatisfiesDirty, not used in MVCC
+ * snapshots.
+ */
+ uint32 speculativeToken;
+
+ /*
+ * Book-keeping information, used by the snapshot manager
*/
- CommandId curcid; /* in my xact, CID < curcid are visible */
uint32 active_count; /* refcount on ActiveSnapshot stack */
uint32 regd_count; /* refcount on RegisteredSnapshots */
-
pairingheap_node ph_node; /* link in the RegisteredSnapshots heap */
} SnapshotData;
diff --git a/src/test/isolation/expected/insert-conflict-do-nothing.out b/src/test/isolation/expected/insert-conflict-do-nothing.out
new file mode 100644
index 0000000000..0a0958f034
--- /dev/null
+++ b/src/test/isolation/expected/insert-conflict-do-nothing.out
@@ -0,0 +1,23 @@
+Parsed test spec with 2 sessions
+
+starting permutation: donothing1 donothing2 c1 select2 c2
+step donothing1: INSERT INTO ints(key, val) VALUES(1, 'donothing1') ON CONFLICT DO NOTHING;
+step donothing2: INSERT INTO ints(key, val) VALUES(1, 'donothing2') ON CONFLICT DO NOTHING;
+step c1: COMMIT;
+step donothing2: <... completed>
+step select2: SELECT * FROM ints;
+key val
+
+1 donothing1
+step c2: COMMIT;
+
+starting permutation: donothing1 donothing2 a1 select2 c2
+step donothing1: INSERT INTO ints(key, val) VALUES(1, 'donothing1') ON CONFLICT DO NOTHING;
+step donothing2: INSERT INTO ints(key, val) VALUES(1, 'donothing2') ON CONFLICT DO NOTHING;
+step a1: ABORT;
+step donothing2: <... completed>
+step select2: SELECT * FROM ints;
+key val
+
+1 donothing2
+step c2: COMMIT;
diff --git a/src/test/isolation/expected/insert-conflict-do-update-2.out b/src/test/isolation/expected/insert-conflict-do-update-2.out
new file mode 100644
index 0000000000..05fb06f8d8
--- /dev/null
+++ b/src/test/isolation/expected/insert-conflict-do-update-2.out
@@ -0,0 +1,23 @@
+Parsed test spec with 2 sessions
+
+starting permutation: insert1 insert2 c1 select2 c2
+step insert1: INSERT INTO upsert(key, payload) VALUES('FooFoo', 'insert1') ON CONFLICT (lower(key)) DO UPDATE set key = EXCLUDED.key, payload = upsert.payload || ' updated by insert1';
+step insert2: INSERT INTO upsert(key, payload) VALUES('FOOFOO', 'insert2') ON CONFLICT (lower(key)) DO UPDATE set key = EXCLUDED.key, payload = upsert.payload || ' updated by insert2';
+step c1: COMMIT;
+step insert2: <... completed>
+step select2: SELECT * FROM upsert;
+key payload
+
+FOOFOO insert1 updated by insert2
+step c2: COMMIT;
+
+starting permutation: insert1 insert2 a1 select2 c2
+step insert1: INSERT INTO upsert(key, payload) VALUES('FooFoo', 'insert1') ON CONFLICT (lower(key)) DO UPDATE set key = EXCLUDED.key, payload = upsert.payload || ' updated by insert1';
+step insert2: INSERT INTO upsert(key, payload) VALUES('FOOFOO', 'insert2') ON CONFLICT (lower(key)) DO UPDATE set key = EXCLUDED.key, payload = upsert.payload || ' updated by insert2';
+step a1: ABORT;
+step insert2: <... completed>
+step select2: SELECT * FROM upsert;
+key payload
+
+FOOFOO insert2
+step c2: COMMIT;
diff --git a/src/test/isolation/expected/insert-conflict-do-update-3.out b/src/test/isolation/expected/insert-conflict-do-update-3.out
new file mode 100644
index 0000000000..6600410618
--- /dev/null
+++ b/src/test/isolation/expected/insert-conflict-do-update-3.out
@@ -0,0 +1,26 @@
+Parsed test spec with 2 sessions
+
+starting permutation: update2 insert1 c2 select1surprise c1
+step update2: UPDATE colors SET is_active = true WHERE key = 1;
+step insert1:
+ WITH t AS (
+ INSERT INTO colors(key, color, is_active)
+ VALUES(1, 'Brown', true), (2, 'Gray', true)
+ ON CONFLICT (key) DO UPDATE
+ SET color = EXCLUDED.color
+ WHERE colors.is_active)
+ SELECT * FROM colors ORDER BY key;
+step c2: COMMIT;
+step insert1: <... completed>
+key color is_active
+
+1 Red f
+2 Green f
+3 Blue f
+step select1surprise: SELECT * FROM colors ORDER BY key;
+key color is_active
+
+1 Brown t
+2 Green f
+3 Blue f
+step c1: COMMIT;
diff --git a/src/test/isolation/expected/insert-conflict-do-update.out b/src/test/isolation/expected/insert-conflict-do-update.out
new file mode 100644
index 0000000000..a634918784
--- /dev/null
+++ b/src/test/isolation/expected/insert-conflict-do-update.out
@@ -0,0 +1,23 @@
+Parsed test spec with 2 sessions
+
+starting permutation: insert1 insert2 c1 select2 c2
+step insert1: INSERT INTO upsert(key, val) VALUES(1, 'insert1') ON CONFLICT (key) DO UPDATE set val = upsert.val || ' updated by insert1';
+step insert2: INSERT INTO upsert(key, val) VALUES(1, 'insert2') ON CONFLICT (key) DO UPDATE set val = upsert.val || ' updated by insert2';
+step c1: COMMIT;
+step insert2: <... completed>
+step select2: SELECT * FROM upsert;
+key val
+
+1 insert1 updated by insert2
+step c2: COMMIT;
+
+starting permutation: insert1 insert2 a1 select2 c2
+step insert1: INSERT INTO upsert(key, val) VALUES(1, 'insert1') ON CONFLICT (key) DO UPDATE set val = upsert.val || ' updated by insert1';
+step insert2: INSERT INTO upsert(key, val) VALUES(1, 'insert2') ON CONFLICT (key) DO UPDATE set val = upsert.val || ' updated by insert2';
+step a1: ABORT;
+step insert2: <... completed>
+step select2: SELECT * FROM upsert;
+key val
+
+1 insert2
+step c2: COMMIT;
diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule
index 3e2614ecac..c0ed637cd2 100644
--- a/src/test/isolation/isolation_schedule
+++ b/src/test/isolation/isolation_schedule
@@ -16,6 +16,10 @@ test: fk-deadlock2
test: eval-plan-qual
test: lock-update-delete
test: lock-update-traversal
+test: insert-conflict-do-nothing
+test: insert-conflict-do-update
+test: insert-conflict-do-update-2
+test: insert-conflict-do-update-3
test: delete-abort-savept
test: delete-abort-savept-2
test: aborted-keyrevoke
diff --git a/src/test/isolation/specs/insert-conflict-do-nothing.spec b/src/test/isolation/specs/insert-conflict-do-nothing.spec
new file mode 100644
index 0000000000..9b92c35cec
--- /dev/null
+++ b/src/test/isolation/specs/insert-conflict-do-nothing.spec
@@ -0,0 +1,41 @@
+# INSERT...ON CONFLICT DO NOTHING test
+#
+# This test tries to expose problems with the interaction between concurrent
+# sessions during INSERT...ON CONFLICT DO NOTHING.
+#
+# The convention here is that session 1 always ends up inserting, and session 2
+# always ends up doing nothing.
+
+setup
+{
+ CREATE TABLE ints (key int primary key, val text);
+}
+
+teardown
+{
+ DROP TABLE ints;
+}
+
+session "s1"
+setup
+{
+ BEGIN ISOLATION LEVEL READ COMMITTED;
+}
+step "donothing1" { INSERT INTO ints(key, val) VALUES(1, 'donothing1') ON CONFLICT DO NOTHING; }
+step "c1" { COMMIT; }
+step "a1" { ABORT; }
+
+session "s2"
+setup
+{
+ BEGIN ISOLATION LEVEL READ COMMITTED;
+}
+step "donothing2" { INSERT INTO ints(key, val) VALUES(1, 'donothing2') ON CONFLICT DO NOTHING; }
+step "select2" { SELECT * FROM ints; }
+step "c2" { COMMIT; }
+step "a2" { ABORT; }
+
+# Regular case where one session block-waits on another to determine if it
+# should proceed with an insert or do nothing.
+permutation "donothing1" "donothing2" "c1" "select2" "c2"
+permutation "donothing1" "donothing2" "a1" "select2" "c2"
diff --git a/src/test/isolation/specs/insert-conflict-do-update-2.spec b/src/test/isolation/specs/insert-conflict-do-update-2.spec
new file mode 100644
index 0000000000..cd7e3f42fe
--- /dev/null
+++ b/src/test/isolation/specs/insert-conflict-do-update-2.spec
@@ -0,0 +1,41 @@
+# INSERT...ON CONFLICT DO UPDATE test
+#
+# This test shows a plausible scenario in which the user might wish to UPDATE a
+# value that is also constrained by the unique index that is the arbiter of
+# whether the alternative path should be taken.
+
+setup
+{
+ CREATE TABLE upsert (key text not null, payload text);
+ CREATE UNIQUE INDEX ON upsert(lower(key));
+}
+
+teardown
+{
+ DROP TABLE upsert;
+}
+
+session "s1"
+setup
+{
+ BEGIN ISOLATION LEVEL READ COMMITTED;
+}
+step "insert1" { INSERT INTO upsert(key, payload) VALUES('FooFoo', 'insert1') ON CONFLICT (lower(key)) DO UPDATE set key = EXCLUDED.key, payload = upsert.payload || ' updated by insert1'; }
+step "c1" { COMMIT; }
+step "a1" { ABORT; }
+
+session "s2"
+setup
+{
+ BEGIN ISOLATION LEVEL READ COMMITTED;
+}
+step "insert2" { INSERT INTO upsert(key, payload) VALUES('FOOFOO', 'insert2') ON CONFLICT (lower(key)) DO UPDATE set key = EXCLUDED.key, payload = upsert.payload || ' updated by insert2'; }
+step "select2" { SELECT * FROM upsert; }
+step "c2" { COMMIT; }
+step "a2" { ABORT; }
+
+# One session (session 2) block-waits on another (session 1) to determine if it
+# should proceed with an insert or update. The user can still usefully UPDATE
+# a column constrained by a unique index, as the example illustrates.
+permutation "insert1" "insert2" "c1" "select2" "c2"
+permutation "insert1" "insert2" "a1" "select2" "c2"
diff --git a/src/test/isolation/specs/insert-conflict-do-update-3.spec b/src/test/isolation/specs/insert-conflict-do-update-3.spec
new file mode 100644
index 0000000000..e282c3beca
--- /dev/null
+++ b/src/test/isolation/specs/insert-conflict-do-update-3.spec
@@ -0,0 +1,69 @@
+# INSERT...ON CONFLICT DO UPDATE test
+#
+# Other INSERT...ON CONFLICT DO UPDATE isolation tests illustrate the "MVCC
+# violation" added to facilitate the feature, whereby a
+# not-visible-to-our-snapshot tuple can be updated by our command all the same.
+# This is generally needed to provide a guarantee of a successful INSERT or
+# UPDATE in READ COMMITTED mode. This MVCC violation is quite distinct from
+# the putative "MVCC violation" that has existed in PostgreSQL for many years,
+# the EvalPlanQual() mechanism, because that mechanism always starts from a
+# tuple that is visible to the command's MVCC snapshot. This test illustrates
+# a slightly distinct user-visible consequence of the same MVCC violation
+# generally associated with INSERT...ON CONFLICT DO UPDATE. The impact of the
+# MVCC violation goes a little beyond updating MVCC-invisible tuples.
+#
+# With INSERT...ON CONFLICT DO UPDATE, the UPDATE predicate is only evaluated
+# once, on this conclusively-locked tuple, and not any other version of the
+# same tuple. It is therefore possible (in READ COMMITTED mode) that the
+# predicate "fail to be satisfied" according to the command's MVCC snapshot.
+# It might simply be that there is no row version visible, but it's also
+# possible that there is some row version visible, but only as a version that
+# doesn't satisfy the predicate. If, however, the conclusively-locked version
+# satisfies the predicate, that's good enough, and the tuple is updated. The
+# MVCC-snapshot-visible row version is denied the opportunity to prevent the
+# UPDATE from taking place, because we don't walk the UPDATE chain in the usual
+# way.
+
+setup
+{
+ CREATE TABLE colors (key int4 PRIMARY KEY, color text, is_active boolean);
+ INSERT INTO colors (key, color, is_active) VALUES(1, 'Red', false);
+ INSERT INTO colors (key, color, is_active) VALUES(2, 'Green', false);
+ INSERT INTO colors (key, color, is_active) VALUES(3, 'Blue', false);
+}
+
+teardown
+{
+ DROP TABLE colors;
+}
+
+session "s1"
+setup
+{
+ BEGIN ISOLATION LEVEL READ COMMITTED;
+}
+step "insert1" {
+ WITH t AS (
+ INSERT INTO colors(key, color, is_active)
+ VALUES(1, 'Brown', true), (2, 'Gray', true)
+ ON CONFLICT (key) DO UPDATE
+ SET color = EXCLUDED.color
+ WHERE colors.is_active)
+ SELECT * FROM colors ORDER BY key;}
+step "select1surprise" { SELECT * FROM colors ORDER BY key; }
+step "c1" { COMMIT; }
+
+session "s2"
+setup
+{
+ BEGIN ISOLATION LEVEL READ COMMITTED;
+}
+step "update2" { UPDATE colors SET is_active = true WHERE key = 1; }
+step "c2" { COMMIT; }
+
+# Perhaps surprisingly, the session 1 MVCC-snapshot-visible tuple (the tuple
+# with the pre-populated color 'Red') is denied the opportunity to prevent the
+# UPDATE from taking place -- only the conclusively-locked tuple version
+# matters, and so the tuple with key value 1 was updated to 'Brown' (but not
+# tuple with key value 2, since nothing changed there):
+permutation "update2" "insert1" "c2" "select1surprise" "c1"
diff --git a/src/test/isolation/specs/insert-conflict-do-update.spec b/src/test/isolation/specs/insert-conflict-do-update.spec
new file mode 100644
index 0000000000..5d335a3444
--- /dev/null
+++ b/src/test/isolation/specs/insert-conflict-do-update.spec
@@ -0,0 +1,40 @@
+# INSERT...ON CONFLICT DO UPDATE test
+#
+# This test tries to expose problems with the interaction between concurrent
+# sessions.
+
+setup
+{
+ CREATE TABLE upsert (key int primary key, val text);
+}
+
+teardown
+{
+ DROP TABLE upsert;
+}
+
+session "s1"
+setup
+{
+ BEGIN ISOLATION LEVEL READ COMMITTED;
+}
+step "insert1" { INSERT INTO upsert(key, val) VALUES(1, 'insert1') ON CONFLICT (key) DO UPDATE set val = upsert.val || ' updated by insert1'; }
+step "c1" { COMMIT; }
+step "a1" { ABORT; }
+
+session "s2"
+setup
+{
+ BEGIN ISOLATION LEVEL READ COMMITTED;
+}
+step "insert2" { INSERT INTO upsert(key, val) VALUES(1, 'insert2') ON CONFLICT (key) DO UPDATE set val = upsert.val || ' updated by insert2'; }
+step "select2" { SELECT * FROM upsert; }
+step "c2" { COMMIT; }
+step "a2" { ABORT; }
+
+# One session (session 2) block-waits on another (session 1) to determine if it
+# should proceed with an insert or update. Notably, this entails updating a
+# tuple while there is no version of that tuple visible to the updating
+# session's snapshot. This is permitted only in READ COMMITTED mode.
+permutation "insert1" "insert2" "c1" "select2" "c2"
+permutation "insert1" "insert2" "a1" "select2" "c2"
diff --git a/src/test/regress/expected/errors.out b/src/test/regress/expected/errors.out
index 5f8868da26..210e5ff39c 100644
--- a/src/test/regress/expected/errors.out
+++ b/src/test/regress/expected/errors.out
@@ -32,7 +32,9 @@ LINE 1: select nonesuch from pg_database;
^
-- empty distinct list isn't OK
select distinct from pg_database;
-ERROR: SELECT DISTINCT must have at least one column
+ERROR: syntax error at or near "from"
+LINE 1: select distinct from pg_database;
+ ^
-- bad attribute name on lhs of operator
select * from pg_database where nonesuch = pg_database.datname;
ERROR: column "nonesuch" does not exist
diff --git a/src/test/regress/expected/insert_conflict.out b/src/test/regress/expected/insert_conflict.out
new file mode 100644
index 0000000000..3273d98793
--- /dev/null
+++ b/src/test/regress/expected/insert_conflict.out
@@ -0,0 +1,476 @@
+--
+-- insert...on conflict do unique index inference
+--
+create table insertconflicttest(key int4, fruit text);
+--
+-- Test unique index inference with operator class specifications and
+-- named collations
+--
+create unique index op_index_key on insertconflicttest(key, fruit text_pattern_ops);
+create unique index collation_index_key on insertconflicttest(key, fruit collate "C");
+create unique index both_index_key on insertconflicttest(key, fruit collate "C" text_pattern_ops);
+create unique index both_index_expr_key on insertconflicttest(key, lower(fruit) collate "C" text_pattern_ops);
+-- fails
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (key) do nothing;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (fruit) do nothing;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+-- succeeds
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (key, fruit) do nothing;
+ QUERY PLAN
+-------------------------------------------------------------------------------
+ Insert on insertconflicttest
+ Conflict Resolution: NOTHING
+ Conflict Arbiter Indexes: op_index_key, collation_index_key, both_index_key
+ -> Result
+(4 rows)
+
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (fruit, key, fruit, key) do nothing;
+ QUERY PLAN
+-------------------------------------------------------------------------------
+ Insert on insertconflicttest
+ Conflict Resolution: NOTHING
+ Conflict Arbiter Indexes: op_index_key, collation_index_key, both_index_key
+ -> Result
+(4 rows)
+
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (lower(fruit), key, lower(fruit), key) do nothing;
+ QUERY PLAN
+-------------------------------------------------
+ Insert on insertconflicttest
+ Conflict Resolution: NOTHING
+ Conflict Arbiter Indexes: both_index_expr_key
+ -> Result
+(4 rows)
+
+-- Neither collation nor operator class specifications are required --
+-- supplying them merely *limits* matches to indexes with matching opclasses
+-- used for relevant indexes
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (key, fruit text_pattern_ops) do nothing;
+ QUERY PLAN
+----------------------------------------------------------
+ Insert on insertconflicttest
+ Conflict Resolution: NOTHING
+ Conflict Arbiter Indexes: op_index_key, both_index_key
+ -> Result
+(4 rows)
+
+-- Okay, arbitrates using both index where text_pattern_ops opclass does and
+-- does not appear.
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (key, fruit collate "C") do nothing;
+ QUERY PLAN
+-----------------------------------------------------------------
+ Insert on insertconflicttest
+ Conflict Resolution: NOTHING
+ Conflict Arbiter Indexes: collation_index_key, both_index_key
+ -> Result
+(4 rows)
+
+-- Okay, but only accepts the single index where both opclass and collation are
+-- specified
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (fruit collate "C" text_pattern_ops, key) do nothing;
+ QUERY PLAN
+--------------------------------------------
+ Insert on insertconflicttest
+ Conflict Resolution: NOTHING
+ Conflict Arbiter Indexes: both_index_key
+ -> Result
+(4 rows)
+
+-- Okay, but only accepts the single index where both opclass and collation are
+-- specified (plus expression variant)
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (lower(fruit) collate "C", key, key) do nothing;
+ QUERY PLAN
+-------------------------------------------------
+ Insert on insertconflicttest
+ Conflict Resolution: NOTHING
+ Conflict Arbiter Indexes: both_index_expr_key
+ -> Result
+(4 rows)
+
+-- Attribute appears twice, while not all attributes/expressions on attributes
+-- appearing within index definition match in terms of both opclass and
+-- collation.
+--
+-- Works because every attribute in inference specification needs to be
+-- satisfied once or more by cataloged index attribute, and as always when an
+-- attribute in the cataloged definition has a non-default opclass/collation,
+-- it still satisfied some inference attribute lacking any particular
+-- opclass/collation specification.
+--
+-- The implementation is liberal in accepting inference specifications on the
+-- assumption that multiple inferred unique indexes will prevent problematic
+-- cases. It rolls with unique indexes where attributes redundantly appear
+-- multiple times, too (which is not tested here).
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (fruit, key, fruit text_pattern_ops, key) do nothing;
+ QUERY PLAN
+----------------------------------------------------------
+ Insert on insertconflicttest
+ Conflict Resolution: NOTHING
+ Conflict Arbiter Indexes: op_index_key, both_index_key
+ -> Result
+(4 rows)
+
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (lower(fruit) collate "C" text_pattern_ops, key, key) do nothing;
+ QUERY PLAN
+-------------------------------------------------
+ Insert on insertconflicttest
+ Conflict Resolution: NOTHING
+ Conflict Arbiter Indexes: both_index_expr_key
+ -> Result
+(4 rows)
+
+drop index op_index_key;
+drop index collation_index_key;
+drop index both_index_key;
+drop index both_index_expr_key;
+--
+-- Single key tests
+--
+create unique index key_index on insertconflicttest(key);
+--
+-- Explain tests
+--
+explain (costs off) insert into insertconflicttest values (0, 'Bilberry') on conflict (key) do update set fruit = excluded.fruit;
+ QUERY PLAN
+---------------------------------------
+ Insert on insertconflicttest
+ Conflict Resolution: UPDATE
+ Conflict Arbiter Indexes: key_index
+ -> Result
+(4 rows)
+
+-- Should display qual actually attributable to internal sequential scan:
+explain (costs off) insert into insertconflicttest values (0, 'Bilberry') on conflict (key) do update set fruit = excluded.fruit where insertconflicttest.fruit != 'Cawesh';
+ QUERY PLAN
+-----------------------------------------------------------------
+ Insert on insertconflicttest
+ Conflict Resolution: UPDATE
+ Conflict Arbiter Indexes: key_index
+ Conflict Filter: (insertconflicttest.fruit <> 'Cawesh'::text)
+ -> Result
+(5 rows)
+
+-- With EXCLUDED.* expression in scan node:
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (key) do update set fruit = excluded.fruit where excluded.fruit != 'Elderberry';
+ QUERY PLAN
+-----------------------------------------------------------
+ Insert on insertconflicttest
+ Conflict Resolution: UPDATE
+ Conflict Arbiter Indexes: key_index
+ Conflict Filter: (excluded.fruit <> 'Elderberry'::text)
+ -> Result
+(5 rows)
+
+-- Does the same, but JSON format shows "Conflict Arbiter Index" as JSON array:
+explain (costs off, format json) insert into insertconflicttest values (0, 'Bilberry') on conflict (key) do update set fruit = excluded.fruit where insertconflicttest.fruit != 'Lime' returning *;
+ QUERY PLAN
+------------------------------------------------------------------------
+ [ +
+ { +
+ "Plan": { +
+ "Node Type": "ModifyTable", +
+ "Operation": "Insert", +
+ "Relation Name": "insertconflicttest", +
+ "Alias": "insertconflicttest", +
+ "Conflict Resolution": "UPDATE", +
+ "Conflict Arbiter Indexes": ["key_index"], +
+ "Conflict Filter": "(insertconflicttest.fruit <> 'Lime'::text)",+
+ "Plans": [ +
+ { +
+ "Node Type": "Result", +
+ "Parent Relationship": "Member" +
+ } +
+ ] +
+ } +
+ } +
+ ]
+(1 row)
+
+-- Fails (no unique index inference specification, required for do update variant):
+insert into insertconflicttest values (1, 'Apple') on conflict do update set fruit = excluded.fruit;
+ERROR: ON CONFLICT DO UPDATE requires inference specification or constraint name
+LINE 1: ...nsert into insertconflicttest values (1, 'Apple') on conflic...
+ ^
+HINT: For example, ON CONFLICT ON CONFLICT ().
+-- inference succeeds:
+insert into insertconflicttest values (1, 'Apple') on conflict (key) do update set fruit = excluded.fruit;
+insert into insertconflicttest values (2, 'Orange') on conflict (key, key, key) do update set fruit = excluded.fruit;
+-- Succeed, since multi-assignment does not involve subquery:
+insert into insertconflicttest
+values (1, 'Apple'), (2, 'Orange')
+on conflict (key) do update set (fruit, key) = (excluded.fruit, excluded.key);
+-- Give good diagnostic message when EXCLUDED.* spuriously referenced from
+-- RETURNING:
+insert into insertconflicttest values (1, 'Apple') on conflict (key) do update set fruit = excluded.fruit RETURNING excluded.fruit;
+ERROR: invalid reference to FROM-clause entry for table "excluded"
+LINE 1: ...y) do update set fruit = excluded.fruit RETURNING excluded.f...
+ ^
+HINT: There is an entry for table "excluded", but it cannot be referenced from this part of the query.
+-- Only suggest
.* column when inference element misspelled:
+insert into insertconflicttest values (1, 'Apple') on conflict (keyy) do update set fruit = excluded.fruit;
+ERROR: column "keyy" does not exist
+LINE 1: ...nsertconflicttest values (1, 'Apple') on conflict (keyy) do ...
+ ^
+HINT: Perhaps you meant to reference the column "insertconflicttest"."key".
+-- Have useful HINT for EXCLUDED.* RTE within UPDATE:
+insert into insertconflicttest values (1, 'Apple') on conflict (key) do update set fruit = excluded.fruitt;
+ERROR: column excluded.fruitt does not exist
+LINE 1: ... 'Apple') on conflict (key) do update set fruit = excluded.f...
+ ^
+HINT: Perhaps you meant to reference the column "excluded"."fruit".
+-- inference fails:
+insert into insertconflicttest values (3, 'Kiwi') on conflict (key, fruit) do update set fruit = excluded.fruit;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+insert into insertconflicttest values (4, 'Mango') on conflict (fruit, key) do update set fruit = excluded.fruit;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+insert into insertconflicttest values (5, 'Lemon') on conflict (fruit) do update set fruit = excluded.fruit;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+insert into insertconflicttest values (6, 'Passionfruit') on conflict (lower(fruit)) do update set fruit = excluded.fruit;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+-- Check the target relation can be aliased
+insert into insertconflicttest values (6, 'Passionfruits') on conflict (key) do update set fruit = excluded.fruit;
+insert into insertconflicttest AS ict values (6, 'Passionfruit') on conflict (key) do update set fruit = excluded.fruit; -- ok, no reference to target table
+insert into insertconflicttest AS ict values (6, 'Passionfruit') on conflict (key) do update set fruit = ict.fruit; -- ok, alias
+insert into insertconflicttest AS ict values (6, 'Passionfruit') on conflict (key) do update set fruit = insertconflicttest.fruit; -- error, references aliased away name
+ERROR: invalid reference to FROM-clause entry for table "insertconflicttest"
+LINE 1: ...onfruit') on conflict (key) do update set fruit = insertconf...
+ ^
+HINT: Perhaps you meant to reference the table alias "ict".
+drop index key_index;
+--
+-- Composite key tests
+--
+create unique index comp_key_index on insertconflicttest(key, fruit);
+-- inference succeeds:
+insert into insertconflicttest values (7, 'Raspberry') on conflict (key, fruit) do update set fruit = excluded.fruit;
+insert into insertconflicttest values (8, 'Lime') on conflict (fruit, key) do update set fruit = excluded.fruit;
+-- inference fails:
+insert into insertconflicttest values (9, 'Banana') on conflict (key) do update set fruit = excluded.fruit;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+insert into insertconflicttest values (10, 'Blueberry') on conflict (key, key, key) do update set fruit = excluded.fruit;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+insert into insertconflicttest values (11, 'Cherry') on conflict (key, lower(fruit)) do update set fruit = excluded.fruit;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+insert into insertconflicttest values (12, 'Date') on conflict (lower(fruit), key) do update set fruit = excluded.fruit;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+drop index comp_key_index;
+--
+-- Partial index tests, no inference predicate specificied
+--
+create unique index part_comp_key_index on insertconflicttest(key, fruit) where key < 5;
+create unique index expr_part_comp_key_index on insertconflicttest(key, lower(fruit)) where key < 5;
+-- inference fails:
+insert into insertconflicttest values (13, 'Grape') on conflict (key, fruit) do update set fruit = excluded.fruit;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+insert into insertconflicttest values (14, 'Raisin') on conflict (fruit, key) do update set fruit = excluded.fruit;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+insert into insertconflicttest values (15, 'Cranberry') on conflict (key) do update set fruit = excluded.fruit;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+insert into insertconflicttest values (16, 'Melon') on conflict (key, key, key) do update set fruit = excluded.fruit;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+insert into insertconflicttest values (17, 'Mulberry') on conflict (key, lower(fruit)) do update set fruit = excluded.fruit;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+insert into insertconflicttest values (18, 'Pineapple') on conflict (lower(fruit), key) do update set fruit = excluded.fruit;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+drop index part_comp_key_index;
+drop index expr_part_comp_key_index;
+--
+-- Expression index tests
+--
+create unique index expr_key_index on insertconflicttest(lower(fruit));
+-- inference succeeds:
+insert into insertconflicttest values (20, 'Quince') on conflict (lower(fruit)) do update set fruit = excluded.fruit;
+insert into insertconflicttest values (21, 'Pomegranate') on conflict (lower(fruit), lower(fruit)) do update set fruit = excluded.fruit;
+-- inference fails:
+insert into insertconflicttest values (22, 'Apricot') on conflict (upper(fruit)) do update set fruit = excluded.fruit;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+insert into insertconflicttest values (23, 'Blackberry') on conflict (fruit) do update set fruit = excluded.fruit;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+drop index expr_key_index;
+--
+-- Expression index tests (with regular column)
+--
+create unique index expr_comp_key_index on insertconflicttest(key, lower(fruit));
+create unique index tricky_expr_comp_key_index on insertconflicttest(key, lower(fruit), upper(fruit));
+-- inference succeeds:
+insert into insertconflicttest values (24, 'Plum') on conflict (key, lower(fruit)) do update set fruit = excluded.fruit;
+insert into insertconflicttest values (25, 'Peach') on conflict (lower(fruit), key) do update set fruit = excluded.fruit;
+-- Should not infer "tricky_expr_comp_key_index" index:
+explain (costs off) insert into insertconflicttest values (26, 'Fig') on conflict (lower(fruit), key, lower(fruit), key) do update set fruit = excluded.fruit;
+ QUERY PLAN
+-------------------------------------------------
+ Insert on insertconflicttest
+ Conflict Resolution: UPDATE
+ Conflict Arbiter Indexes: expr_comp_key_index
+ -> Result
+(4 rows)
+
+-- inference fails:
+insert into insertconflicttest values (27, 'Prune') on conflict (key, upper(fruit)) do update set fruit = excluded.fruit;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+insert into insertconflicttest values (28, 'Redcurrant') on conflict (fruit, key) do update set fruit = excluded.fruit;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+insert into insertconflicttest values (29, 'Nectarine') on conflict (key) do update set fruit = excluded.fruit;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+drop index expr_comp_key_index;
+drop index tricky_expr_comp_key_index;
+--
+-- Non-spurious duplicate violation tests
+--
+create unique index key_index on insertconflicttest(key);
+create unique index fruit_index on insertconflicttest(fruit);
+-- succeeds, since UPDATE happens to update "fruit" to existing value:
+insert into insertconflicttest values (26, 'Fig') on conflict (key) do update set fruit = excluded.fruit;
+-- fails, since UPDATE is to row with key value 26, and we're updating "fruit"
+-- to a value that happens to exist in another row ('peach'):
+insert into insertconflicttest values (26, 'Peach') on conflict (key) do update set fruit = excluded.fruit;
+ERROR: duplicate key value violates unique constraint "fruit_index"
+DETAIL: Key (fruit)=(Peach) already exists.
+-- succeeds, since "key" isn't repeated/referenced in UPDATE, and "fruit"
+-- arbitrates that statement updates existing "Fig" row:
+insert into insertconflicttest values (25, 'Fig') on conflict (fruit) do update set fruit = excluded.fruit;
+drop index key_index;
+drop index fruit_index;
+--
+-- Test partial unique index inference
+--
+create unique index partial_key_index on insertconflicttest(key) where fruit like '%berry';
+-- Succeeds
+insert into insertconflicttest values (23, 'Blackberry') on conflict (key) where fruit like '%berry' do update set fruit = excluded.fruit;
+insert into insertconflicttest values (23, 'Blackberry') on conflict (key) where fruit like '%berry' and fruit = 'inconsequential' do nothing;
+-- fails
+insert into insertconflicttest values (23, 'Blackberry') on conflict (key) do update set fruit = excluded.fruit;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+insert into insertconflicttest values (23, 'Blackberry') on conflict (key) where fruit like '%berry' or fruit = 'consequential' do nothing;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+insert into insertconflicttest values (23, 'Blackberry') on conflict (fruit) where fruit like '%berry' do update set fruit = excluded.fruit;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+drop index partial_key_index;
+-- Cleanup
+drop table insertconflicttest;
+-- ******************************************************************
+-- * *
+-- * Test inheritance (example taken from tutorial) *
+-- * *
+-- ******************************************************************
+create table cities (
+ name text,
+ population float8,
+ altitude int -- (in ft)
+);
+create table capitals (
+ state char(2)
+) inherits (cities);
+-- Create unique indexes. Due to a general limitation of inheritance,
+-- uniqueness is only enforced per-relation. Unique index inference
+-- specification will do the right thing, though.
+create unique index cities_names_unique on cities (name);
+create unique index capitals_names_unique on capitals (name);
+-- prepopulate the tables.
+insert into cities values ('San Francisco', 7.24E+5, 63);
+insert into cities values ('Las Vegas', 2.583E+5, 2174);
+insert into cities values ('Mariposa', 1200, 1953);
+insert into capitals values ('Sacramento', 3.694E+5, 30, 'CA');
+insert into capitals values ('Madison', 1.913E+5, 845, 'WI');
+-- Tests proper for inheritance:
+select * from capitals;
+ name | population | altitude | state
+------------+------------+----------+-------
+ Sacramento | 369400 | 30 | CA
+ Madison | 191300 | 845 | WI
+(2 rows)
+
+-- Succeeds:
+insert into cities values ('Las Vegas', 2.583E+5, 2174) on conflict do nothing;
+insert into capitals values ('Sacramento', 4664.E+5, 30, 'CA') on conflict (name) do update set population = excluded.population;
+-- Wrong "Sacramento", so do nothing:
+insert into capitals values ('Sacramento', 50, 2267, 'NE') on conflict (name) do nothing;
+select * from capitals;
+ name | population | altitude | state
+------------+------------+----------+-------
+ Madison | 191300 | 845 | WI
+ Sacramento | 466400000 | 30 | CA
+(2 rows)
+
+insert into cities values ('Las Vegas', 5.83E+5, 2001) on conflict (name) do update set population = excluded.population, altitude = excluded.altitude;
+select tableoid::regclass, * from cities;
+ tableoid | name | population | altitude
+----------+---------------+------------+----------
+ cities | San Francisco | 724000 | 63
+ cities | Mariposa | 1200 | 1953
+ cities | Las Vegas | 583000 | 2001
+ capitals | Madison | 191300 | 845
+ capitals | Sacramento | 466400000 | 30
+(5 rows)
+
+insert into capitals values ('Las Vegas', 5.83E+5, 2222, 'NV') on conflict (name) do update set population = excluded.population;
+-- Capitals will contain new capital, Las Vegas:
+select * from capitals;
+ name | population | altitude | state
+------------+------------+----------+-------
+ Madison | 191300 | 845 | WI
+ Sacramento | 466400000 | 30 | CA
+ Las Vegas | 583000 | 2222 | NV
+(3 rows)
+
+-- Cities contains two instances of "Las Vegas", since unique constraints don't
+-- work across inheritance:
+select tableoid::regclass, * from cities;
+ tableoid | name | population | altitude
+----------+---------------+------------+----------
+ cities | San Francisco | 724000 | 63
+ cities | Mariposa | 1200 | 1953
+ cities | Las Vegas | 583000 | 2001
+ capitals | Madison | 191300 | 845
+ capitals | Sacramento | 466400000 | 30
+ capitals | Las Vegas | 583000 | 2222
+(6 rows)
+
+-- This only affects "cities" version of "Las Vegas":
+insert into cities values ('Las Vegas', 5.86E+5, 2223) on conflict (name) do update set population = excluded.population, altitude = excluded.altitude;
+select tableoid::regclass, * from cities;
+ tableoid | name | population | altitude
+----------+---------------+------------+----------
+ cities | San Francisco | 724000 | 63
+ cities | Mariposa | 1200 | 1953
+ cities | Las Vegas | 586000 | 2223
+ capitals | Madison | 191300 | 845
+ capitals | Sacramento | 466400000 | 30
+ capitals | Las Vegas | 583000 | 2222
+(6 rows)
+
+-- clean up
+drop table capitals;
+drop table cities;
+-- Make sure a table named excluded is handled properly
+create table excluded(key int primary key, data text);
+insert into excluded values(1, '1');
+-- error, ambiguous
+insert into excluded values(1, '2') on conflict (key) do update set data = excluded.data RETURNING *;
+ERROR: table reference "excluded" is ambiguous
+LINE 1: ...es(1, '2') on conflict (key) do update set data = excluded.d...
+ ^
+-- ok, aliased
+insert into excluded AS target values(1, '2') on conflict (key) do update set data = excluded.data RETURNING *;
+ key | data
+-----+------
+ 1 | 2
+(1 row)
+
+-- ok, aliased
+insert into excluded AS target values(1, '2') on conflict (key) do update set data = target.data RETURNING *;
+ key | data
+-----+------
+ 1 | 2
+(1 row)
+
+-- make sure excluded isn't a problem in returning clause
+insert into excluded values(1, '2') on conflict (key) do update set data = 3 RETURNING excluded.*;
+ key | data
+-----+------
+ 1 | 3
+(1 row)
+
+-- clean up
+drop table excluded;
diff --git a/src/test/regress/expected/privileges.out b/src/test/regress/expected/privileges.out
index 0db1df3040..64a93309eb 100644
--- a/src/test/regress/expected/privileges.out
+++ b/src/test/regress/expected/privileges.out
@@ -269,7 +269,7 @@ SELECT * FROM atestv2; -- fail (even though regressuser2 can access underlying a
ERROR: permission denied for relation atest2
-- Test column level permissions
SET SESSION AUTHORIZATION regressuser1;
-CREATE TABLE atest5 (one int, two int, three int);
+CREATE TABLE atest5 (one int, two int unique, three int, four int unique);
CREATE TABLE atest6 (one int, two int, blue int);
GRANT SELECT (one), INSERT (two), UPDATE (three) ON atest5 TO regressuser4;
GRANT ALL (one) ON atest5 TO regressuser3;
@@ -367,6 +367,33 @@ UPDATE atest5 SET one = 8; -- fail
ERROR: permission denied for relation atest5
UPDATE atest5 SET three = 5, one = 2; -- fail
ERROR: permission denied for relation atest5
+-- Check that column level privs are enforced in RETURNING
+-- Ok.
+INSERT INTO atest5(two) VALUES (6) ON CONFLICT (two) DO UPDATE set three = 10;
+-- Error. No SELECT on column three.
+INSERT INTO atest5(two) VALUES (6) ON CONFLICT (two) DO UPDATE set three = 10 RETURNING atest5.three;
+ERROR: permission denied for relation atest5
+-- Ok. May SELECT on column "one":
+INSERT INTO atest5(two) VALUES (6) ON CONFLICT (two) DO UPDATE set three = 10 RETURNING atest5.one;
+ one
+-----
+
+(1 row)
+
+-- Check that column level privileges are enforced for EXCLUDED
+-- Ok. we may select one
+INSERT INTO atest5(two) VALUES (6) ON CONFLICT (two) DO UPDATE set three = EXCLUDED.one;
+-- Error. No select rights on three
+INSERT INTO atest5(two) VALUES (6) ON CONFLICT (two) DO UPDATE set three = EXCLUDED.three;
+ERROR: permission denied for relation atest5
+INSERT INTO atest5(two) VALUES (6) ON CONFLICT (two) DO UPDATE set one = 8; -- fails (due to UPDATE)
+ERROR: permission denied for relation atest5
+INSERT INTO atest5(three) VALUES (4) ON CONFLICT (two) DO UPDATE set three = 10; -- fails (due to INSERT)
+ERROR: permission denied for relation atest5
+-- Check that the the columns in the inference require select privileges
+-- Error. No privs on four
+INSERT INTO atest5(three) VALUES (4) ON CONFLICT (four) DO UPDATE set three = 10;
+ERROR: permission denied for relation atest5
SET SESSION AUTHORIZATION regressuser1;
REVOKE ALL (one) ON atest5 FROM regressuser4;
GRANT SELECT (one,two,blue) ON atest6 TO regressuser4;
diff --git a/src/test/regress/expected/returning.out b/src/test/regress/expected/returning.out
index 69bdacc103..cb51bb8687 100644
--- a/src/test/regress/expected/returning.out
+++ b/src/test/regress/expected/returning.out
@@ -331,3 +331,27 @@ SELECT * FROM voo;
17 | zoo2
(2 rows)
+-- Check aliased target relation
+INSERT INTO foo AS bar DEFAULT VALUES RETURNING *; -- ok
+ f1 | f2 | f3 | f4
+----+----+----+----
+ 4 | | 42 | 99
+(1 row)
+
+INSERT INTO foo AS bar DEFAULT VALUES RETURNING foo.*; -- fails, wrong name
+ERROR: invalid reference to FROM-clause entry for table "foo"
+LINE 1: INSERT INTO foo AS bar DEFAULT VALUES RETURNING foo.*;
+ ^
+HINT: Perhaps you meant to reference the table alias "bar".
+INSERT INTO foo AS bar DEFAULT VALUES RETURNING bar.*; -- ok
+ f1 | f2 | f3 | f4
+----+----+----+----
+ 5 | | 42 | 99
+(1 row)
+
+INSERT INTO foo AS bar DEFAULT VALUES RETURNING bar.f3; -- ok
+ f3
+----
+ 42
+(1 row)
+
diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out
index ad93632174..82bc47895a 100644
--- a/src/test/regress/expected/rowsecurity.out
+++ b/src/test/regress/expected/rowsecurity.out
@@ -1490,6 +1490,138 @@ SELECT * FROM b1;
4 | yyy
(21 rows)
+--
+-- INSERT ... ON CONFLICT DO UPDATE and Row-level security
+--
+SET SESSION AUTHORIZATION rls_regress_user0;
+DROP POLICY p1 ON document;
+CREATE POLICY p1 ON document FOR SELECT USING (true);
+CREATE POLICY p2 ON document FOR INSERT WITH CHECK (dauthor = current_user);
+CREATE POLICY p3 ON document FOR UPDATE
+ USING (cid = (SELECT cid from category WHERE cname = 'novel'))
+ WITH CHECK (dauthor = current_user);
+SET SESSION AUTHORIZATION rls_regress_user1;
+-- Exists...
+SELECT * FROM document WHERE did = 2;
+ did | cid | dlevel | dauthor | dtitle
+-----+-----+--------+-------------------+-----------------
+ 2 | 11 | 2 | rls_regress_user1 | my second novel
+(1 row)
+
+-- ...so violates actual WITH CHECK OPTION within UPDATE (not INSERT, since
+-- alternative UPDATE path happens to be taken):
+INSERT INTO document VALUES (2, (SELECT cid from category WHERE cname = 'novel'), 1, 'rls_regress_user2', 'my first novel')
+ ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle, dauthor = EXCLUDED.dauthor;
+ERROR: new row violates row level security policy for "document"
+-- Violates USING qual for UPDATE policy p3.
+--
+-- UPDATE path is taken, but UPDATE fails purely because *existing* row to be
+-- updated is not a "novel"/cid 11 (row is not leaked, even though we have
+-- SELECT privileges sufficient to see the row in this instance):
+INSERT INTO document VALUES (33, 22, 1, 'rls_regress_user1', 'okay science fiction'); -- preparation for next statement
+INSERT INTO document VALUES (33, (SELECT cid from category WHERE cname = 'novel'), 1, 'rls_regress_user1', 'Some novel, replaces sci-fi') -- takes UPDATE path
+ ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle;
+ERROR: new row violates row level security policy (USING expression) for "document"
+-- Fine (we UPDATE, since INSERT WCOs and UPDATE security barrier quals + WCOs
+-- not violated):
+INSERT INTO document VALUES (2, (SELECT cid from category WHERE cname = 'novel'), 1, 'rls_regress_user1', 'my first novel')
+ ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle RETURNING *;
+ did | cid | dlevel | dauthor | dtitle
+-----+-----+--------+-------------------+----------------
+ 2 | 11 | 2 | rls_regress_user1 | my first novel
+(1 row)
+
+-- Fine (we INSERT, so "cid = 33" ("technology") isn't evaluated):
+INSERT INTO document VALUES (78, (SELECT cid from category WHERE cname = 'novel'), 1, 'rls_regress_user1', 'some technology novel')
+ ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle, cid = 33 RETURNING *;
+ did | cid | dlevel | dauthor | dtitle
+-----+-----+--------+-------------------+-----------------------
+ 78 | 11 | 1 | rls_regress_user1 | some technology novel
+(1 row)
+
+-- Fine (same query, but we UPDATE, so "cid = 33", ("technology") is not the
+-- case in respect of *existing* tuple):
+INSERT INTO document VALUES (78, (SELECT cid from category WHERE cname = 'novel'), 1, 'rls_regress_user1', 'some technology novel')
+ ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle, cid = 33 RETURNING *;
+ did | cid | dlevel | dauthor | dtitle
+-----+-----+--------+-------------------+-----------------------
+ 78 | 33 | 1 | rls_regress_user1 | some technology novel
+(1 row)
+
+-- Same query a third time, but now fails due to existing tuple finally not
+-- passing quals:
+INSERT INTO document VALUES (78, (SELECT cid from category WHERE cname = 'novel'), 1, 'rls_regress_user1', 'some technology novel')
+ ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle, cid = 33 RETURNING *;
+ERROR: new row violates row level security policy (USING expression) for "document"
+-- Don't fail just because INSERT doesn't satisfy WITH CHECK option that
+-- originated as a barrier/USING() qual from the UPDATE. Note that the UPDATE
+-- path *isn't* taken, and so UPDATE-related policy does not apply:
+INSERT INTO document VALUES (79, (SELECT cid from category WHERE cname = 'technology'), 1, 'rls_regress_user1', 'technology book, can only insert')
+ ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle RETURNING *;
+ did | cid | dlevel | dauthor | dtitle
+-----+-----+--------+-------------------+----------------------------------
+ 79 | 33 | 1 | rls_regress_user1 | technology book, can only insert
+(1 row)
+
+-- But this time, the same statement fails, because the UPDATE path is taken,
+-- and updating the row just inserted falls afoul of security barrier qual
+-- (enforced as WCO) -- what we might have updated target tuple to is
+-- irrelevant, in fact.
+INSERT INTO document VALUES (79, (SELECT cid from category WHERE cname = 'technology'), 1, 'rls_regress_user1', 'technology book, can only insert')
+ ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle RETURNING *;
+ERROR: new row violates row level security policy (USING expression) for "document"
+-- Test default USING qual enforced as WCO
+SET SESSION AUTHORIZATION rls_regress_user0;
+DROP POLICY p1 ON document;
+DROP POLICY p2 ON document;
+DROP POLICY p3 ON document;
+CREATE POLICY p3_with_default ON document FOR UPDATE
+ USING (cid = (SELECT cid from category WHERE cname = 'novel'));
+SET SESSION AUTHORIZATION rls_regress_user1;
+-- Just because WCO-style enforcement of USING quals occurs with
+-- existing/target tuple does not mean that the implementation can be allowed
+-- to fail to also enforce this qual against the final tuple appended to
+-- relation (since in the absence of an explicit WCO, this is also interpreted
+-- as an UPDATE/ALL WCO in general).
+--
+-- UPDATE path is taken here (fails due to existing tuple). Note that this is
+-- not reported as a "USING expression", because it's an RLS UPDATE check that originated as
+-- a USING qual for the purposes of RLS in general, as opposed to an explicit
+-- USING qual that is ordinarily a security barrier. We leave it up to the
+-- UPDATE to make this fail:
+INSERT INTO document VALUES (79, (SELECT cid from category WHERE cname = 'technology'), 1, 'rls_regress_user1', 'technology book, can only insert')
+ ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle RETURNING *;
+ERROR: new row violates row level security policy for "document"
+-- UPDATE path is taken here. Existing tuple passes, since it's cid
+-- corresponds to "novel", but default USING qual is enforced against
+-- post-UPDATE tuple too (as always when updating with a policy that lacks an
+-- explicit WCO), and so this fails:
+INSERT INTO document VALUES (2, (SELECT cid from category WHERE cname = 'technology'), 1, 'rls_regress_user1', 'my first novel')
+ ON CONFLICT (did) DO UPDATE SET cid = EXCLUDED.cid, dtitle = EXCLUDED.dtitle RETURNING *;
+ERROR: new row violates row level security policy for "document"
+SET SESSION AUTHORIZATION rls_regress_user0;
+DROP POLICY p3_with_default ON document;
+--
+-- Test ALL policies with ON CONFLICT DO UPDATE (much the same as existing UPDATE
+-- tests)
+--
+CREATE POLICY p3_with_all ON document FOR ALL
+ USING (cid = (SELECT cid from category WHERE cname = 'novel'))
+ WITH CHECK (dauthor = current_user);
+SET SESSION AUTHORIZATION rls_regress_user1;
+-- Fails, since ALL WCO is enforced in insert path:
+INSERT INTO document VALUES (80, (SELECT cid from category WHERE cname = 'novel'), 1, 'rls_regress_user2', 'my first novel')
+ ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle, cid = 33;
+ERROR: new row violates row level security policy for "document"
+-- Fails, since ALL policy USING qual is enforced (existing, target tuple is in
+-- violation, since it has the "manga" cid):
+INSERT INTO document VALUES (4, (SELECT cid from category WHERE cname = 'novel'), 1, 'rls_regress_user1', 'my first novel')
+ ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle;
+ERROR: new row violates row level security policy (USING expression) for "document"
+-- Fails, since ALL WCO are enforced:
+INSERT INTO document VALUES (1, (SELECT cid from category WHERE cname = 'novel'), 1, 'rls_regress_user1', 'my first novel')
+ ON CONFLICT (did) DO UPDATE SET dauthor = 'rls_regress_user2';
+ERROR: new row violates row level security policy for "document"
--
-- ROLE/GROUP
--
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index f7f016be21..2df24c0de1 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1123,6 +1123,10 @@ SELECT * FROM shoelace_log ORDER BY sl_name;
SELECT * FROM shoelace_obsolete WHERE sl_avail = 0;
insert into shoelace values ('sl9', 0, 'pink', 35.0, 'inch', 0.0);
insert into shoelace values ('sl10', 1000, 'magenta', 40.0, 'inch', 0.0);
+-- Unsupported (even though a similar updatable view construct is)
+insert into shoelace values ('sl10', 1000, 'magenta', 40.0, 'inch', 0.0)
+ on conflict do nothing;
+ERROR: INSERT with ON CONFLICT clause cannot be used with table that has INSERT or UPDATE rules
SELECT * FROM shoelace_obsolete ORDER BY sl_len_cm;
sl_name | sl_avail | sl_color | sl_len | sl_unit | sl_len_cm
------------+----------+------------+--------+----------+-----------
@@ -2362,6 +2366,22 @@ DETAIL: Key (id3a, id3c)=(1, 13) is not present in table "rule_and_refint_t2".
insert into rule_and_refint_t3 values (1, 13, 11, 'row6');
ERROR: insert or update on table "rule_and_refint_t3" violates foreign key constraint "rule_and_refint_t3_id3a_fkey"
DETAIL: Key (id3a, id3b)=(1, 13) is not present in table "rule_and_refint_t1".
+-- Ordinary table
+insert into rule_and_refint_t3 values (1, 13, 11, 'row6')
+ on conflict do nothing;
+ERROR: insert or update on table "rule_and_refint_t3" violates foreign key constraint "rule_and_refint_t3_id3a_fkey"
+DETAIL: Key (id3a, id3b)=(1, 13) is not present in table "rule_and_refint_t1".
+-- rule not fired, so fk violation
+insert into rule_and_refint_t3 values (1, 13, 11, 'row6')
+ on conflict (id3a, id3b, id3c) do update
+ set id3b = excluded.id3b;
+ERROR: insert or update on table "rule_and_refint_t3" violates foreign key constraint "rule_and_refint_t3_id3a_fkey"
+DETAIL: Key (id3a, id3b)=(1, 13) is not present in table "rule_and_refint_t1".
+-- rule fired, so unsupported
+insert into shoelace values ('sl9', 0, 'pink', 35.0, 'inch', 0.0)
+ on conflict (sl_name) do update
+ set sl_avail = excluded.sl_avail;
+ERROR: INSERT with ON CONFLICT clause cannot be used with table that has INSERT or UPDATE rules
create rule rule_and_refint_t3_ins as on insert to rule_and_refint_t3
where (exists (select 1 from rule_and_refint_t3
where (((rule_and_refint_t3.id3a = new.id3a)
@@ -2743,3 +2763,73 @@ View definition:
FROM ( VALUES (1,2)) v(q, w);
drop view rule_v1;
+--
+-- Check DO INSTEAD rules with ON CONFLICT
+--
+CREATE TABLE hats (
+ hat_name char(10) primary key,
+ hat_color char(10) -- hat color
+);
+CREATE TABLE hat_data (
+ hat_name char(10) primary key,
+ hat_color char(10) -- hat color
+);
+-- okay
+CREATE RULE hat_nosert AS ON INSERT TO hats
+ DO INSTEAD
+ INSERT INTO hat_data VALUES (
+ NEW.hat_name,
+ NEW.hat_color)
+ ON CONFLICT (hat_name) DO NOTHING RETURNING *;
+-- Works (projects row)
+INSERT INTO hats VALUES ('h7', 'black') RETURNING *;
+ hat_name | hat_color
+------------+------------
+ h7 | black
+(1 row)
+
+-- Works (does nothing)
+INSERT INTO hats VALUES ('h7', 'black') RETURNING *;
+ hat_name | hat_color
+----------+-----------
+(0 rows)
+
+SELECT tablename, rulename, definition FROM pg_rules
+ WHERE tablename = 'hats';
+ tablename | rulename | definition
+-----------+------------+------------------------------------------------------------------------------
+ hats | hat_nosert | CREATE RULE hat_nosert AS +
+ | | ON INSERT TO hats DO INSTEAD INSERT INTO hat_data (hat_name, hat_color)+
+ | | VALUES (new.hat_name, new.hat_color) ON CONFLICT DO NOTHING +
+ | | RETURNING hat_data.hat_name, +
+ | | hat_data.hat_color;
+(1 row)
+
+DROP RULE hat_nosert ON hats;
+CREATE RULE hat_upsert AS ON INSERT TO hats
+ DO INSTEAD
+ INSERT INTO hat_data VALUES (
+ NEW.hat_name,
+ NEW.hat_color)
+ ON CONFLICT (hat_name) DO UPDATE SET hat_color = 'Orange' RETURNING *;
+-- Works (does upsert)
+INSERT INTO hats VALUES ('h7', 'black') RETURNING *;
+ hat_name | hat_color
+------------+------------
+ h7 | Orange
+(1 row)
+
+SELECT tablename, rulename, definition FROM pg_rules
+ WHERE tablename = 'hats';
+ tablename | rulename | definition
+-----------+------------+-----------------------------------------------------------------------------------------------
+ hats | hat_upsert | CREATE RULE hat_upsert AS +
+ | | ON INSERT TO hats DO INSTEAD INSERT INTO hat_data (hat_name, hat_color) +
+ | | VALUES (new.hat_name, new.hat_color) ON CONFLICT DO UPDATE SET hat_color = 'Orange'::bpchar+
+ | | RETURNING hat_data.hat_name, +
+ | | hat_data.hat_color;
+(1 row)
+
+DROP RULE hat_upsert ON hats;
+drop table hats;
+drop table hat_data;
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index b14410fd22..de64ca7ec7 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -638,6 +638,28 @@ from
-----
(0 rows)
+--
+-- Test case for subselect within UPDATE of INSERT...ON CONFLICT DO UPDATE
+--
+create temp table upsert(key int4 primary key, val text);
+insert into upsert values(1, 'val') on conflict (key) do update set val = 'not seen';
+insert into upsert values(1, 'val') on conflict (key) do update set val = 'seen with subselect ' || (select f1 from int4_tbl where f1 != 0 limit 1)::text;
+select * from upsert;
+ key | val
+-----+----------------------------
+ 1 | seen with subselect 123456
+(1 row)
+
+with aa as (select 'int4_tbl' u from int4_tbl limit 1)
+insert into upsert values (1, 'x'), (999, 'y')
+on conflict (key) do update set val = (select u from aa)
+returning *;
+ key | val
+-----+----------
+ 1 | int4_tbl
+ 999 | y
+(2 rows)
+
--
-- Test case for cross-type partial matching in hashed subplan (bug #7597)
--
diff --git a/src/test/regress/expected/triggers.out b/src/test/regress/expected/triggers.out
index f1a5fde107..3b32e8fdfe 100644
--- a/src/test/regress/expected/triggers.out
+++ b/src/test/regress/expected/triggers.out
@@ -274,7 +274,7 @@ drop sequence ttdummy_seq;
-- tests for per-statement triggers
--
CREATE TABLE log_table (tstamp timestamp default timeofday()::timestamp);
-CREATE TABLE main_table (a int, b int);
+CREATE TABLE main_table (a int unique, b int);
COPY main_table (a,b) FROM stdin;
CREATE FUNCTION trigger_func() RETURNS trigger LANGUAGE plpgsql AS '
BEGIN
@@ -291,6 +291,14 @@ FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func('after_ins_stmt');
--
CREATE TRIGGER after_upd_stmt_trig AFTER UPDATE ON main_table
EXECUTE PROCEDURE trigger_func('after_upd_stmt');
+-- Both insert and update statement level triggers (before and after) should
+-- fire. Doesn't fire UPDATE before trigger, but only because one isn't
+-- defined.
+INSERT INTO main_table (a, b) VALUES (5, 10) ON CONFLICT (a)
+ DO UPDATE SET b = EXCLUDED.b;
+NOTICE: trigger_func(before_ins_stmt) called: action = INSERT, when = BEFORE, level = STATEMENT
+NOTICE: trigger_func(after_upd_stmt) called: action = UPDATE, when = AFTER, level = STATEMENT
+NOTICE: trigger_func(after_ins_stmt) called: action = INSERT, when = AFTER, level = STATEMENT
CREATE TRIGGER after_upd_row_trig AFTER UPDATE ON main_table
FOR EACH ROW EXECUTE PROCEDURE trigger_func('after_upd_row');
INSERT INTO main_table DEFAULT VALUES;
@@ -305,6 +313,8 @@ NOTICE: trigger_func(after_upd_stmt) called: action = UPDATE, when = AFTER, lev
-- UPDATE that effects zero rows should still call per-statement trigger
UPDATE main_table SET a = a + 2 WHERE b > 100;
NOTICE: trigger_func(after_upd_stmt) called: action = UPDATE, when = AFTER, level = STATEMENT
+-- constraint now unneeded
+ALTER TABLE main_table DROP CONSTRAINT main_table_a_key;
-- COPY should fire per-row and per-statement INSERT triggers
COPY main_table (a, b) FROM stdin;
NOTICE: trigger_func(before_ins_stmt) called: action = INSERT, when = BEFORE, level = STATEMENT
@@ -1731,3 +1741,93 @@ select * from self_ref_trigger;
drop table self_ref_trigger;
drop function self_ref_trigger_ins_func();
drop function self_ref_trigger_del_func();
+--
+-- Verify behavior of before and after triggers with INSERT...ON CONFLICT
+-- DO UPDATE
+--
+create table upsert (key int4 primary key, color text);
+create function upsert_before_func()
+ returns trigger language plpgsql as
+$$
+begin
+ if (TG_OP = 'UPDATE') then
+ raise warning 'before update (old): %', old.*::text;
+ raise warning 'before update (new): %', new.*::text;
+ elsif (TG_OP = 'INSERT') then
+ raise warning 'before insert (new): %', new.*::text;
+ if new.key % 2 = 0 then
+ new.key := new.key + 1;
+ new.color := new.color || ' trig modified';
+ raise warning 'before insert (new, modified): %', new.*::text;
+ end if;
+ end if;
+ return new;
+end;
+$$;
+create trigger upsert_before_trig before insert or update on upsert
+ for each row execute procedure upsert_before_func();
+create function upsert_after_func()
+ returns trigger language plpgsql as
+$$
+begin
+ if (TG_OP = 'UPDATE') then
+ raise warning 'after update (old): %', new.*::text;
+ raise warning 'after update (new): %', new.*::text;
+ elsif (TG_OP = 'INSERT') then
+ raise warning 'after insert (new): %', new.*::text;
+ end if;
+ return null;
+end;
+$$;
+create trigger upsert_after_trig after insert or update on upsert
+ for each row execute procedure upsert_after_func();
+insert into upsert values(1, 'black') on conflict (key) do update set color = 'updated ' || upsert.color;
+WARNING: before insert (new): (1,black)
+WARNING: after insert (new): (1,black)
+insert into upsert values(2, 'red') on conflict (key) do update set color = 'updated ' || upsert.color;
+WARNING: before insert (new): (2,red)
+WARNING: before insert (new, modified): (3,"red trig modified")
+WARNING: after insert (new): (3,"red trig modified")
+insert into upsert values(3, 'orange') on conflict (key) do update set color = 'updated ' || upsert.color;
+WARNING: before insert (new): (3,orange)
+WARNING: before update (old): (3,"red trig modified")
+WARNING: before update (new): (3,"updated red trig modified")
+WARNING: after update (old): (3,"updated red trig modified")
+WARNING: after update (new): (3,"updated red trig modified")
+insert into upsert values(4, 'green') on conflict (key) do update set color = 'updated ' || upsert.color;
+WARNING: before insert (new): (4,green)
+WARNING: before insert (new, modified): (5,"green trig modified")
+WARNING: after insert (new): (5,"green trig modified")
+insert into upsert values(5, 'purple') on conflict (key) do update set color = 'updated ' || upsert.color;
+WARNING: before insert (new): (5,purple)
+WARNING: before update (old): (5,"green trig modified")
+WARNING: before update (new): (5,"updated green trig modified")
+WARNING: after update (old): (5,"updated green trig modified")
+WARNING: after update (new): (5,"updated green trig modified")
+insert into upsert values(6, 'white') on conflict (key) do update set color = 'updated ' || upsert.color;
+WARNING: before insert (new): (6,white)
+WARNING: before insert (new, modified): (7,"white trig modified")
+WARNING: after insert (new): (7,"white trig modified")
+insert into upsert values(7, 'pink') on conflict (key) do update set color = 'updated ' || upsert.color;
+WARNING: before insert (new): (7,pink)
+WARNING: before update (old): (7,"white trig modified")
+WARNING: before update (new): (7,"updated white trig modified")
+WARNING: after update (old): (7,"updated white trig modified")
+WARNING: after update (new): (7,"updated white trig modified")
+insert into upsert values(8, 'yellow') on conflict (key) do update set color = 'updated ' || upsert.color;
+WARNING: before insert (new): (8,yellow)
+WARNING: before insert (new, modified): (9,"yellow trig modified")
+WARNING: after insert (new): (9,"yellow trig modified")
+select * from upsert;
+ key | color
+-----+-----------------------------
+ 1 | black
+ 3 | updated red trig modified
+ 5 | updated green trig modified
+ 7 | updated white trig modified
+ 9 | yellow trig modified
+(5 rows)
+
+drop table upsert;
+drop function upsert_before_func();
+drop function upsert_after_func();
diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out
index ccabe9e3dc..7eb92612d7 100644
--- a/src/test/regress/expected/updatable_views.out
+++ b/src/test/regress/expected/updatable_views.out
@@ -215,6 +215,67 @@ INSERT INTO rw_view15 VALUES (3, 'ROW 3'); -- should fail
ERROR: cannot insert into column "upper" of view "rw_view15"
DETAIL: View columns that are not columns of their base relation are not updatable.
INSERT INTO rw_view15 (a) VALUES (3); -- should be OK
+INSERT INTO rw_view15 (a) VALUES (3) ON CONFLICT DO NOTHING; -- succeeds
+SELECT * FROM rw_view15;
+ a | upper
+----+-------------
+ -2 | ROW -2
+ -1 | ROW -1
+ 0 | ROW 0
+ 1 | ROW 1
+ 2 | ROW 2
+ 3 | UNSPECIFIED
+(6 rows)
+
+INSERT INTO rw_view15 (a) VALUES (3) ON CONFLICT (a) DO NOTHING; -- succeeds
+SELECT * FROM rw_view15;
+ a | upper
+----+-------------
+ -2 | ROW -2
+ -1 | ROW -1
+ 0 | ROW 0
+ 1 | ROW 1
+ 2 | ROW 2
+ 3 | UNSPECIFIED
+(6 rows)
+
+INSERT INTO rw_view15 (a) VALUES (3) ON CONFLICT (a) DO UPDATE set a = excluded.a; -- succeeds
+SELECT * FROM rw_view15;
+ a | upper
+----+-------------
+ -2 | ROW -2
+ -1 | ROW -1
+ 0 | ROW 0
+ 1 | ROW 1
+ 2 | ROW 2
+ 3 | UNSPECIFIED
+(6 rows)
+
+INSERT INTO rw_view15 (a) VALUES (3) ON CONFLICT (a) DO UPDATE set upper = 'blarg'; -- fails
+ERROR: cannot insert into column "upper" of view "rw_view15"
+DETAIL: View columns that are not columns of their base relation are not updatable.
+SELECT * FROM rw_view15;
+ a | upper
+----+-------------
+ -2 | ROW -2
+ -1 | ROW -1
+ 0 | ROW 0
+ 1 | ROW 1
+ 2 | ROW 2
+ 3 | UNSPECIFIED
+(6 rows)
+
+SELECT * FROM rw_view15;
+ a | upper
+----+-------------
+ -2 | ROW -2
+ -1 | ROW -1
+ 0 | ROW 0
+ 1 | ROW 1
+ 2 | ROW 2
+ 3 | UNSPECIFIED
+(6 rows)
+
ALTER VIEW rw_view15 ALTER COLUMN upper SET DEFAULT 'NOT SET';
INSERT INTO rw_view15 (a) VALUES (4); -- should fail
ERROR: cannot insert into column "upper" of view "rw_view15"
diff --git a/src/test/regress/expected/update.out b/src/test/regress/expected/update.out
index 1de2a867a8..adc1fd7c39 100644
--- a/src/test/regress/expected/update.out
+++ b/src/test/regress/expected/update.out
@@ -6,6 +6,10 @@ CREATE TABLE update_test (
b INT,
c TEXT
);
+CREATE TABLE upsert_test (
+ a INT PRIMARY KEY,
+ b TEXT
+);
INSERT INTO update_test VALUES (5, 10, 'foo');
INSERT INTO update_test(b, a) VALUES (15, 10);
SELECT * FROM update_test;
@@ -147,4 +151,34 @@ SELECT a, b, char_length(c) FROM update_test;
42 | 12 | 10000
(4 rows)
+-- Test ON CONFLICT DO UPDATE
+INSERT INTO upsert_test VALUES(1, 'Boo');
+-- uncorrelated sub-select:
+WITH aaa AS (SELECT 1 AS a, 'Foo' AS b) INSERT INTO upsert_test
+ VALUES (1, 'Bar') ON CONFLICT(a)
+ DO UPDATE SET (b, a) = (SELECT b, a FROM aaa) RETURNING *;
+ a | b
+---+-----
+ 1 | Foo
+(1 row)
+
+-- correlated sub-select:
+INSERT INTO upsert_test VALUES (1, 'Baz') ON CONFLICT(a)
+ DO UPDATE SET (b, a) = (SELECT b || ', Correlated', a from upsert_test i WHERE i.a = upsert_test.a)
+ RETURNING *;
+ a | b
+---+-----------------
+ 1 | Foo, Correlated
+(1 row)
+
+-- correlated sub-select (EXCLUDED.* alias):
+INSERT INTO upsert_test VALUES (1, 'Bat') ON CONFLICT(a)
+ DO UPDATE SET (b, a) = (SELECT b || ', Excluded', a from upsert_test i WHERE i.a = excluded.a)
+ RETURNING *;
+ a | b
+---+---------------------------
+ 1 | Foo, Correlated, Excluded
+(1 row)
+
DROP TABLE update_test;
+DROP TABLE upsert_test;
diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out
index a31ec341e6..2c9226c3db 100644
--- a/src/test/regress/expected/with.out
+++ b/src/test/regress/expected/with.out
@@ -1806,6 +1806,88 @@ SELECT * FROM y;
-400
(22 rows)
+-- data-modifying WITH containing INSERT...ON CONFLICT DO UPDATE
+CREATE TABLE z AS SELECT i AS k, (i || ' v')::text v FROM generate_series(1, 16, 3) i;
+ALTER TABLE z ADD UNIQUE (k);
+WITH t AS (
+ INSERT INTO z SELECT i, 'insert'
+ FROM generate_series(0, 16) i
+ ON CONFLICT (k) DO UPDATE SET v = z.v || ', now update'
+ RETURNING *
+)
+SELECT * FROM t JOIN y ON t.k = y.a ORDER BY a, k;
+ k | v | a
+---+--------+---
+ 0 | insert | 0
+ 0 | insert | 0
+(2 rows)
+
+-- Test EXCLUDED.* reference within CTE
+WITH aa AS (
+ INSERT INTO z VALUES(1, 5) ON CONFLICT (k) DO UPDATE SET v = EXCLUDED.v
+ WHERE z.k != EXCLUDED.k
+ RETURNING *
+)
+SELECT * FROM aa;
+ k | v
+---+---
+(0 rows)
+
+-- New query/snapshot demonstrates side-effects of previous query.
+SELECT * FROM z ORDER BY k;
+ k | v
+----+------------------
+ 0 | insert
+ 1 | 1 v, now update
+ 2 | insert
+ 3 | insert
+ 4 | 4 v, now update
+ 5 | insert
+ 6 | insert
+ 7 | 7 v, now update
+ 8 | insert
+ 9 | insert
+ 10 | 10 v, now update
+ 11 | insert
+ 12 | insert
+ 13 | 13 v, now update
+ 14 | insert
+ 15 | insert
+ 16 | 16 v, now update
+(17 rows)
+
+--
+-- Ensure subqueries within the update clause work, even if they
+-- reference outside values
+--
+WITH aa AS (SELECT 1 a, 2 b)
+INSERT INTO z VALUES(1, 'insert')
+ON CONFLICT (k) DO UPDATE SET v = (SELECT b || ' update' FROM aa WHERE a = 1 LIMIT 1);
+WITH aa AS (SELECT 1 a, 2 b)
+INSERT INTO z VALUES(1, 'insert')
+ON CONFLICT (k) DO UPDATE SET v = ' update' WHERE z.k = (SELECT a FROM aa);
+WITH aa AS (SELECT 1 a, 2 b)
+INSERT INTO z VALUES(1, 'insert')
+ON CONFLICT (k) DO UPDATE SET v = (SELECT b || ' update' FROM aa WHERE a = 1 LIMIT 1);
+WITH aa AS (SELECT 'a' a, 'b' b UNION ALL SELECT 'a' a, 'b' b)
+INSERT INTO z VALUES(1, 'insert')
+ON CONFLICT (k) DO UPDATE SET v = (SELECT b || ' update' FROM aa WHERE a = 'a' LIMIT 1);
+WITH aa AS (SELECT 1 a, 2 b)
+INSERT INTO z VALUES(1, (SELECT b || ' insert' FROM aa WHERE a = 1 ))
+ON CONFLICT (k) DO UPDATE SET v = (SELECT b || ' update' FROM aa WHERE a = 1 LIMIT 1);
+-- This shows an attempt to update an invisible row, which should really be
+-- reported as a cardinality violation, but it doesn't seem worth fixing:
+WITH simpletup AS (
+ SELECT 2 k, 'Green' v),
+upsert_cte AS (
+ INSERT INTO z VALUES(2, 'Blue') ON CONFLICT (k) DO
+ UPDATE SET (k, v) = (SELECT k, v FROM simpletup WHERE simpletup.k = z.k)
+ RETURNING k, v)
+INSERT INTO z VALUES(2, 'Red') ON CONFLICT (k) DO
+UPDATE SET (k, v) = (SELECT k, v FROM upsert_cte WHERE upsert_cte.k = z.k)
+RETURNING k, v;
+ERROR: attempted to update invisible tuple
+DROP TABLE z;
-- check that run to completion happens in proper ordering
TRUNCATE TABLE y;
INSERT INTO y SELECT generate_series(1, 3);
diff --git a/src/test/regress/input/constraints.source b/src/test/regress/input/constraints.source
index c16f65088a..7647544f9b 100644
--- a/src/test/regress/input/constraints.source
+++ b/src/test/regress/input/constraints.source
@@ -292,6 +292,11 @@ INSERT INTO UNIQUE_TBL VALUES (5, 'one');
INSERT INTO UNIQUE_TBL (t) VALUES ('six');
INSERT INTO UNIQUE_TBL (t) VALUES ('seven');
+INSERT INTO UNIQUE_TBL VALUES (5, 'five-upsert-insert') ON CONFLICT (i) DO UPDATE SET t = 'five-upsert-update';
+INSERT INTO UNIQUE_TBL VALUES (6, 'six-upsert-insert') ON CONFLICT (i) DO UPDATE SET t = 'six-upsert-update';
+-- should fail
+INSERT INTO UNIQUE_TBL VALUES (1, 'a'), (2, 'b'), (2, 'b') ON CONFLICT (i) DO UPDATE SET t = 'fails';
+
SELECT '' AS five, * FROM UNIQUE_TBL;
DROP TABLE UNIQUE_TBL;
@@ -438,6 +443,12 @@ INSERT INTO circles VALUES('<(0,0), 5>', '<(0,0), 4>');
INSERT INTO circles VALUES('<(10,10), 10>', '<(0,0), 5>');
-- fail, overlaps
INSERT INTO circles VALUES('<(20,20), 10>', '<(0,0), 4>');
+-- succeed, because violation is ignored
+INSERT INTO circles VALUES('<(20,20), 10>', '<(0,0), 4>')
+ ON CONFLICT ON CONSTRAINT circles_c1_c2_excl DO NOTHING;
+-- fail, because DO UPDATE variant requires unique index
+INSERT INTO circles VALUES('<(20,20), 10>', '<(0,0), 4>')
+ ON CONFLICT ON CONSTRAINT circles_c1_c2_excl DO UPDATE SET c2 = EXCLUDED.c2;
-- succeed because c1 doesn't overlap
INSERT INTO circles VALUES('<(20,20), 1>', '<(0,0), 5>');
-- succeed because c2 doesn't overlap
@@ -462,6 +473,7 @@ CREATE TABLE deferred_excl (
INSERT INTO deferred_excl VALUES(1);
INSERT INTO deferred_excl VALUES(2);
INSERT INTO deferred_excl VALUES(1); -- fail
+INSERT INTO deferred_excl VALUES(1) ON CONFLICT ON CONSTRAINT deferred_excl_con DO NOTHING; -- fail
BEGIN;
INSERT INTO deferred_excl VALUES(2); -- no fail here
COMMIT; -- should fail here
diff --git a/src/test/regress/output/constraints.source b/src/test/regress/output/constraints.source
index d3ec233313..bbe4ed1976 100644
--- a/src/test/regress/output/constraints.source
+++ b/src/test/regress/output/constraints.source
@@ -421,16 +421,23 @@ INSERT INTO UNIQUE_TBL VALUES (4, 'four');
INSERT INTO UNIQUE_TBL VALUES (5, 'one');
INSERT INTO UNIQUE_TBL (t) VALUES ('six');
INSERT INTO UNIQUE_TBL (t) VALUES ('seven');
+INSERT INTO UNIQUE_TBL VALUES (5, 'five-upsert-insert') ON CONFLICT (i) DO UPDATE SET t = 'five-upsert-update';
+INSERT INTO UNIQUE_TBL VALUES (6, 'six-upsert-insert') ON CONFLICT (i) DO UPDATE SET t = 'six-upsert-update';
+-- should fail
+INSERT INTO UNIQUE_TBL VALUES (1, 'a'), (2, 'b'), (2, 'b') ON CONFLICT (i) DO UPDATE SET t = 'fails';
+ERROR: ON CONFLICT DO UPDATE command cannot affect row a second time
+HINT: Ensure that no rows proposed for insertion within the same command have duplicate constrained values.
SELECT '' AS five, * FROM UNIQUE_TBL;
- five | i | t
-------+---+-------
+ five | i | t
+------+---+--------------------
| 1 | one
| 2 | two
| 4 | four
- | 5 | one
| | six
| | seven
-(6 rows)
+ | 5 | five-upsert-update
+ | 6 | six-upsert-insert
+(7 rows)
DROP TABLE UNIQUE_TBL;
CREATE TABLE UNIQUE_TBL (i int, t text,
@@ -605,6 +612,13 @@ INSERT INTO circles VALUES('<(10,10), 10>', '<(0,0), 5>');
INSERT INTO circles VALUES('<(20,20), 10>', '<(0,0), 4>');
ERROR: conflicting key value violates exclusion constraint "circles_c1_c2_excl"
DETAIL: Key (c1, (c2::circle))=(<(20,20),10>, <(0,0),4>) conflicts with existing key (c1, (c2::circle))=(<(10,10),10>, <(0,0),5>).
+-- succeed, because violation is ignored
+INSERT INTO circles VALUES('<(20,20), 10>', '<(0,0), 4>')
+ ON CONFLICT ON CONSTRAINT circles_c1_c2_excl DO NOTHING;
+-- fail, because DO UPDATE variant requires unique index
+INSERT INTO circles VALUES('<(20,20), 10>', '<(0,0), 4>')
+ ON CONFLICT ON CONSTRAINT circles_c1_c2_excl DO UPDATE SET c2 = EXCLUDED.c2;
+ERROR: ON CONFLICT DO UPDATE not supported with exclusion constraints
-- succeed because c1 doesn't overlap
INSERT INTO circles VALUES('<(20,20), 1>', '<(0,0), 5>');
-- succeed because c2 doesn't overlap
@@ -627,6 +641,8 @@ INSERT INTO deferred_excl VALUES(2);
INSERT INTO deferred_excl VALUES(1); -- fail
ERROR: conflicting key value violates exclusion constraint "deferred_excl_con"
DETAIL: Key (f1)=(1) conflicts with existing key (f1)=(1).
+INSERT INTO deferred_excl VALUES(1) ON CONFLICT ON CONSTRAINT deferred_excl_con DO NOTHING; -- fail
+ERROR: ON CONFLICT does not support deferred unique constraints/exclusion constraints as arbiters
BEGIN;
INSERT INTO deferred_excl VALUES(2); -- no fail here
COMMIT; -- should fail here
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 6d3b865351..b0ebb6b3f4 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -36,6 +36,7 @@ test: geometry horology regex oidjoins type_sanity opr_sanity
# These four each depend on the previous one
# ----------
test: insert
+test: insert_conflict
test: create_function_1
test: create_type
test: create_table
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index 8326894ed9..8409c0f3ef 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -50,6 +50,7 @@ test: oidjoins
test: type_sanity
test: opr_sanity
test: insert
+test: insert_conflict
test: create_function_1
test: create_type
test: create_table
diff --git a/src/test/regress/sql/insert_conflict.sql b/src/test/regress/sql/insert_conflict.sql
new file mode 100644
index 0000000000..ba2b66bdb6
--- /dev/null
+++ b/src/test/regress/sql/insert_conflict.sql
@@ -0,0 +1,284 @@
+--
+-- insert...on conflict do unique index inference
+--
+create table insertconflicttest(key int4, fruit text);
+
+--
+-- Test unique index inference with operator class specifications and
+-- named collations
+--
+create unique index op_index_key on insertconflicttest(key, fruit text_pattern_ops);
+create unique index collation_index_key on insertconflicttest(key, fruit collate "C");
+create unique index both_index_key on insertconflicttest(key, fruit collate "C" text_pattern_ops);
+create unique index both_index_expr_key on insertconflicttest(key, lower(fruit) collate "C" text_pattern_ops);
+
+-- fails
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (key) do nothing;
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (fruit) do nothing;
+
+-- succeeds
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (key, fruit) do nothing;
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (fruit, key, fruit, key) do nothing;
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (lower(fruit), key, lower(fruit), key) do nothing;
+-- Neither collation nor operator class specifications are required --
+-- supplying them merely *limits* matches to indexes with matching opclasses
+-- used for relevant indexes
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (key, fruit text_pattern_ops) do nothing;
+-- Okay, arbitrates using both index where text_pattern_ops opclass does and
+-- does not appear.
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (key, fruit collate "C") do nothing;
+-- Okay, but only accepts the single index where both opclass and collation are
+-- specified
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (fruit collate "C" text_pattern_ops, key) do nothing;
+-- Okay, but only accepts the single index where both opclass and collation are
+-- specified (plus expression variant)
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (lower(fruit) collate "C", key, key) do nothing;
+-- Attribute appears twice, while not all attributes/expressions on attributes
+-- appearing within index definition match in terms of both opclass and
+-- collation.
+--
+-- Works because every attribute in inference specification needs to be
+-- satisfied once or more by cataloged index attribute, and as always when an
+-- attribute in the cataloged definition has a non-default opclass/collation,
+-- it still satisfied some inference attribute lacking any particular
+-- opclass/collation specification.
+--
+-- The implementation is liberal in accepting inference specifications on the
+-- assumption that multiple inferred unique indexes will prevent problematic
+-- cases. It rolls with unique indexes where attributes redundantly appear
+-- multiple times, too (which is not tested here).
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (fruit, key, fruit text_pattern_ops, key) do nothing;
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (lower(fruit) collate "C" text_pattern_ops, key, key) do nothing;
+
+drop index op_index_key;
+drop index collation_index_key;
+drop index both_index_key;
+drop index both_index_expr_key;
+
+--
+-- Single key tests
+--
+create unique index key_index on insertconflicttest(key);
+
+--
+-- Explain tests
+--
+explain (costs off) insert into insertconflicttest values (0, 'Bilberry') on conflict (key) do update set fruit = excluded.fruit;
+-- Should display qual actually attributable to internal sequential scan:
+explain (costs off) insert into insertconflicttest values (0, 'Bilberry') on conflict (key) do update set fruit = excluded.fruit where insertconflicttest.fruit != 'Cawesh';
+-- With EXCLUDED.* expression in scan node:
+explain (costs off) insert into insertconflicttest values(0, 'Crowberry') on conflict (key) do update set fruit = excluded.fruit where excluded.fruit != 'Elderberry';
+-- Does the same, but JSON format shows "Conflict Arbiter Index" as JSON array:
+explain (costs off, format json) insert into insertconflicttest values (0, 'Bilberry') on conflict (key) do update set fruit = excluded.fruit where insertconflicttest.fruit != 'Lime' returning *;
+
+-- Fails (no unique index inference specification, required for do update variant):
+insert into insertconflicttest values (1, 'Apple') on conflict do update set fruit = excluded.fruit;
+
+-- inference succeeds:
+insert into insertconflicttest values (1, 'Apple') on conflict (key) do update set fruit = excluded.fruit;
+insert into insertconflicttest values (2, 'Orange') on conflict (key, key, key) do update set fruit = excluded.fruit;
+
+-- Succeed, since multi-assignment does not involve subquery:
+insert into insertconflicttest
+values (1, 'Apple'), (2, 'Orange')
+on conflict (key) do update set (fruit, key) = (excluded.fruit, excluded.key);
+
+-- Give good diagnostic message when EXCLUDED.* spuriously referenced from
+-- RETURNING:
+insert into insertconflicttest values (1, 'Apple') on conflict (key) do update set fruit = excluded.fruit RETURNING excluded.fruit;
+
+-- Only suggest
.* column when inference element misspelled:
+insert into insertconflicttest values (1, 'Apple') on conflict (keyy) do update set fruit = excluded.fruit;
+
+-- Have useful HINT for EXCLUDED.* RTE within UPDATE:
+insert into insertconflicttest values (1, 'Apple') on conflict (key) do update set fruit = excluded.fruitt;
+
+-- inference fails:
+insert into insertconflicttest values (3, 'Kiwi') on conflict (key, fruit) do update set fruit = excluded.fruit;
+insert into insertconflicttest values (4, 'Mango') on conflict (fruit, key) do update set fruit = excluded.fruit;
+insert into insertconflicttest values (5, 'Lemon') on conflict (fruit) do update set fruit = excluded.fruit;
+insert into insertconflicttest values (6, 'Passionfruit') on conflict (lower(fruit)) do update set fruit = excluded.fruit;
+
+-- Check the target relation can be aliased
+insert into insertconflicttest values (6, 'Passionfruits') on conflict (key) do update set fruit = excluded.fruit;
+insert into insertconflicttest AS ict values (6, 'Passionfruit') on conflict (key) do update set fruit = excluded.fruit; -- ok, no reference to target table
+insert into insertconflicttest AS ict values (6, 'Passionfruit') on conflict (key) do update set fruit = ict.fruit; -- ok, alias
+insert into insertconflicttest AS ict values (6, 'Passionfruit') on conflict (key) do update set fruit = insertconflicttest.fruit; -- error, references aliased away name
+
+drop index key_index;
+
+--
+-- Composite key tests
+--
+create unique index comp_key_index on insertconflicttest(key, fruit);
+
+-- inference succeeds:
+insert into insertconflicttest values (7, 'Raspberry') on conflict (key, fruit) do update set fruit = excluded.fruit;
+insert into insertconflicttest values (8, 'Lime') on conflict (fruit, key) do update set fruit = excluded.fruit;
+
+-- inference fails:
+insert into insertconflicttest values (9, 'Banana') on conflict (key) do update set fruit = excluded.fruit;
+insert into insertconflicttest values (10, 'Blueberry') on conflict (key, key, key) do update set fruit = excluded.fruit;
+insert into insertconflicttest values (11, 'Cherry') on conflict (key, lower(fruit)) do update set fruit = excluded.fruit;
+insert into insertconflicttest values (12, 'Date') on conflict (lower(fruit), key) do update set fruit = excluded.fruit;
+
+drop index comp_key_index;
+
+--
+-- Partial index tests, no inference predicate specificied
+--
+create unique index part_comp_key_index on insertconflicttest(key, fruit) where key < 5;
+create unique index expr_part_comp_key_index on insertconflicttest(key, lower(fruit)) where key < 5;
+
+-- inference fails:
+insert into insertconflicttest values (13, 'Grape') on conflict (key, fruit) do update set fruit = excluded.fruit;
+insert into insertconflicttest values (14, 'Raisin') on conflict (fruit, key) do update set fruit = excluded.fruit;
+insert into insertconflicttest values (15, 'Cranberry') on conflict (key) do update set fruit = excluded.fruit;
+insert into insertconflicttest values (16, 'Melon') on conflict (key, key, key) do update set fruit = excluded.fruit;
+insert into insertconflicttest values (17, 'Mulberry') on conflict (key, lower(fruit)) do update set fruit = excluded.fruit;
+insert into insertconflicttest values (18, 'Pineapple') on conflict (lower(fruit), key) do update set fruit = excluded.fruit;
+
+drop index part_comp_key_index;
+drop index expr_part_comp_key_index;
+
+--
+-- Expression index tests
+--
+create unique index expr_key_index on insertconflicttest(lower(fruit));
+
+-- inference succeeds:
+insert into insertconflicttest values (20, 'Quince') on conflict (lower(fruit)) do update set fruit = excluded.fruit;
+insert into insertconflicttest values (21, 'Pomegranate') on conflict (lower(fruit), lower(fruit)) do update set fruit = excluded.fruit;
+
+-- inference fails:
+insert into insertconflicttest values (22, 'Apricot') on conflict (upper(fruit)) do update set fruit = excluded.fruit;
+insert into insertconflicttest values (23, 'Blackberry') on conflict (fruit) do update set fruit = excluded.fruit;
+
+drop index expr_key_index;
+
+--
+-- Expression index tests (with regular column)
+--
+create unique index expr_comp_key_index on insertconflicttest(key, lower(fruit));
+create unique index tricky_expr_comp_key_index on insertconflicttest(key, lower(fruit), upper(fruit));
+
+-- inference succeeds:
+insert into insertconflicttest values (24, 'Plum') on conflict (key, lower(fruit)) do update set fruit = excluded.fruit;
+insert into insertconflicttest values (25, 'Peach') on conflict (lower(fruit), key) do update set fruit = excluded.fruit;
+-- Should not infer "tricky_expr_comp_key_index" index:
+explain (costs off) insert into insertconflicttest values (26, 'Fig') on conflict (lower(fruit), key, lower(fruit), key) do update set fruit = excluded.fruit;
+
+-- inference fails:
+insert into insertconflicttest values (27, 'Prune') on conflict (key, upper(fruit)) do update set fruit = excluded.fruit;
+insert into insertconflicttest values (28, 'Redcurrant') on conflict (fruit, key) do update set fruit = excluded.fruit;
+insert into insertconflicttest values (29, 'Nectarine') on conflict (key) do update set fruit = excluded.fruit;
+
+drop index expr_comp_key_index;
+drop index tricky_expr_comp_key_index;
+
+--
+-- Non-spurious duplicate violation tests
+--
+create unique index key_index on insertconflicttest(key);
+create unique index fruit_index on insertconflicttest(fruit);
+
+-- succeeds, since UPDATE happens to update "fruit" to existing value:
+insert into insertconflicttest values (26, 'Fig') on conflict (key) do update set fruit = excluded.fruit;
+-- fails, since UPDATE is to row with key value 26, and we're updating "fruit"
+-- to a value that happens to exist in another row ('peach'):
+insert into insertconflicttest values (26, 'Peach') on conflict (key) do update set fruit = excluded.fruit;
+-- succeeds, since "key" isn't repeated/referenced in UPDATE, and "fruit"
+-- arbitrates that statement updates existing "Fig" row:
+insert into insertconflicttest values (25, 'Fig') on conflict (fruit) do update set fruit = excluded.fruit;
+
+drop index key_index;
+drop index fruit_index;
+
+--
+-- Test partial unique index inference
+--
+create unique index partial_key_index on insertconflicttest(key) where fruit like '%berry';
+
+-- Succeeds
+insert into insertconflicttest values (23, 'Blackberry') on conflict (key) where fruit like '%berry' do update set fruit = excluded.fruit;
+insert into insertconflicttest values (23, 'Blackberry') on conflict (key) where fruit like '%berry' and fruit = 'inconsequential' do nothing;
+
+-- fails
+insert into insertconflicttest values (23, 'Blackberry') on conflict (key) do update set fruit = excluded.fruit;
+insert into insertconflicttest values (23, 'Blackberry') on conflict (key) where fruit like '%berry' or fruit = 'consequential' do nothing;
+insert into insertconflicttest values (23, 'Blackberry') on conflict (fruit) where fruit like '%berry' do update set fruit = excluded.fruit;
+
+drop index partial_key_index;
+
+-- Cleanup
+drop table insertconflicttest;
+
+-- ******************************************************************
+-- * *
+-- * Test inheritance (example taken from tutorial) *
+-- * *
+-- ******************************************************************
+create table cities (
+ name text,
+ population float8,
+ altitude int -- (in ft)
+);
+
+create table capitals (
+ state char(2)
+) inherits (cities);
+
+-- Create unique indexes. Due to a general limitation of inheritance,
+-- uniqueness is only enforced per-relation. Unique index inference
+-- specification will do the right thing, though.
+create unique index cities_names_unique on cities (name);
+create unique index capitals_names_unique on capitals (name);
+
+-- prepopulate the tables.
+insert into cities values ('San Francisco', 7.24E+5, 63);
+insert into cities values ('Las Vegas', 2.583E+5, 2174);
+insert into cities values ('Mariposa', 1200, 1953);
+
+insert into capitals values ('Sacramento', 3.694E+5, 30, 'CA');
+insert into capitals values ('Madison', 1.913E+5, 845, 'WI');
+
+-- Tests proper for inheritance:
+select * from capitals;
+
+-- Succeeds:
+insert into cities values ('Las Vegas', 2.583E+5, 2174) on conflict do nothing;
+insert into capitals values ('Sacramento', 4664.E+5, 30, 'CA') on conflict (name) do update set population = excluded.population;
+-- Wrong "Sacramento", so do nothing:
+insert into capitals values ('Sacramento', 50, 2267, 'NE') on conflict (name) do nothing;
+select * from capitals;
+insert into cities values ('Las Vegas', 5.83E+5, 2001) on conflict (name) do update set population = excluded.population, altitude = excluded.altitude;
+select tableoid::regclass, * from cities;
+insert into capitals values ('Las Vegas', 5.83E+5, 2222, 'NV') on conflict (name) do update set population = excluded.population;
+-- Capitals will contain new capital, Las Vegas:
+select * from capitals;
+-- Cities contains two instances of "Las Vegas", since unique constraints don't
+-- work across inheritance:
+select tableoid::regclass, * from cities;
+-- This only affects "cities" version of "Las Vegas":
+insert into cities values ('Las Vegas', 5.86E+5, 2223) on conflict (name) do update set population = excluded.population, altitude = excluded.altitude;
+select tableoid::regclass, * from cities;
+
+-- clean up
+drop table capitals;
+drop table cities;
+
+
+-- Make sure a table named excluded is handled properly
+create table excluded(key int primary key, data text);
+insert into excluded values(1, '1');
+-- error, ambiguous
+insert into excluded values(1, '2') on conflict (key) do update set data = excluded.data RETURNING *;
+-- ok, aliased
+insert into excluded AS target values(1, '2') on conflict (key) do update set data = excluded.data RETURNING *;
+-- ok, aliased
+insert into excluded AS target values(1, '2') on conflict (key) do update set data = target.data RETURNING *;
+-- make sure excluded isn't a problem in returning clause
+insert into excluded values(1, '2') on conflict (key) do update set data = 3 RETURNING excluded.*;
+
+-- clean up
+drop table excluded;
diff --git a/src/test/regress/sql/privileges.sql b/src/test/regress/sql/privileges.sql
index f97a75a5fd..22b54a28c4 100644
--- a/src/test/regress/sql/privileges.sql
+++ b/src/test/regress/sql/privileges.sql
@@ -194,7 +194,7 @@ SELECT * FROM atestv2; -- fail (even though regressuser2 can access underlying a
-- Test column level permissions
SET SESSION AUTHORIZATION regressuser1;
-CREATE TABLE atest5 (one int, two int, three int);
+CREATE TABLE atest5 (one int, two int unique, three int, four int unique);
CREATE TABLE atest6 (one int, two int, blue int);
GRANT SELECT (one), INSERT (two), UPDATE (three) ON atest5 TO regressuser4;
GRANT ALL (one) ON atest5 TO regressuser3;
@@ -245,6 +245,23 @@ INSERT INTO atest5 VALUES (5,5,5); -- fail
UPDATE atest5 SET three = 10; -- ok
UPDATE atest5 SET one = 8; -- fail
UPDATE atest5 SET three = 5, one = 2; -- fail
+-- Check that column level privs are enforced in RETURNING
+-- Ok.
+INSERT INTO atest5(two) VALUES (6) ON CONFLICT (two) DO UPDATE set three = 10;
+-- Error. No SELECT on column three.
+INSERT INTO atest5(two) VALUES (6) ON CONFLICT (two) DO UPDATE set three = 10 RETURNING atest5.three;
+-- Ok. May SELECT on column "one":
+INSERT INTO atest5(two) VALUES (6) ON CONFLICT (two) DO UPDATE set three = 10 RETURNING atest5.one;
+-- Check that column level privileges are enforced for EXCLUDED
+-- Ok. we may select one
+INSERT INTO atest5(two) VALUES (6) ON CONFLICT (two) DO UPDATE set three = EXCLUDED.one;
+-- Error. No select rights on three
+INSERT INTO atest5(two) VALUES (6) ON CONFLICT (two) DO UPDATE set three = EXCLUDED.three;
+INSERT INTO atest5(two) VALUES (6) ON CONFLICT (two) DO UPDATE set one = 8; -- fails (due to UPDATE)
+INSERT INTO atest5(three) VALUES (4) ON CONFLICT (two) DO UPDATE set three = 10; -- fails (due to INSERT)
+-- Check that the the columns in the inference require select privileges
+-- Error. No privs on four
+INSERT INTO atest5(three) VALUES (4) ON CONFLICT (four) DO UPDATE set three = 10;
SET SESSION AUTHORIZATION regressuser1;
REVOKE ALL (one) ON atest5 FROM regressuser4;
diff --git a/src/test/regress/sql/returning.sql b/src/test/regress/sql/returning.sql
index 0ed9a48951..a460f82fb7 100644
--- a/src/test/regress/sql/returning.sql
+++ b/src/test/regress/sql/returning.sql
@@ -154,3 +154,9 @@ UPDATE joinview SET f1 = f1 + 1 WHERE f3 = 57 RETURNING *, other + 1;
SELECT * FROM joinview;
SELECT * FROM foo;
SELECT * FROM voo;
+
+-- Check aliased target relation
+INSERT INTO foo AS bar DEFAULT VALUES RETURNING *; -- ok
+INSERT INTO foo AS bar DEFAULT VALUES RETURNING foo.*; -- fails, wrong name
+INSERT INTO foo AS bar DEFAULT VALUES RETURNING bar.*; -- ok
+INSERT INTO foo AS bar DEFAULT VALUES RETURNING bar.f3; -- ok
diff --git a/src/test/regress/sql/rowsecurity.sql b/src/test/regress/sql/rowsecurity.sql
index 7d12dd00a2..e8c5932b20 100644
--- a/src/test/regress/sql/rowsecurity.sql
+++ b/src/test/regress/sql/rowsecurity.sql
@@ -511,6 +511,118 @@ DELETE FROM bv1 WHERE a = 6 AND f_leak(b);
SET SESSION AUTHORIZATION rls_regress_user0;
SELECT * FROM b1;
+--
+-- INSERT ... ON CONFLICT DO UPDATE and Row-level security
+--
+
+SET SESSION AUTHORIZATION rls_regress_user0;
+DROP POLICY p1 ON document;
+
+CREATE POLICY p1 ON document FOR SELECT USING (true);
+CREATE POLICY p2 ON document FOR INSERT WITH CHECK (dauthor = current_user);
+CREATE POLICY p3 ON document FOR UPDATE
+ USING (cid = (SELECT cid from category WHERE cname = 'novel'))
+ WITH CHECK (dauthor = current_user);
+
+SET SESSION AUTHORIZATION rls_regress_user1;
+
+-- Exists...
+SELECT * FROM document WHERE did = 2;
+
+-- ...so violates actual WITH CHECK OPTION within UPDATE (not INSERT, since
+-- alternative UPDATE path happens to be taken):
+INSERT INTO document VALUES (2, (SELECT cid from category WHERE cname = 'novel'), 1, 'rls_regress_user2', 'my first novel')
+ ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle, dauthor = EXCLUDED.dauthor;
+
+-- Violates USING qual for UPDATE policy p3.
+--
+-- UPDATE path is taken, but UPDATE fails purely because *existing* row to be
+-- updated is not a "novel"/cid 11 (row is not leaked, even though we have
+-- SELECT privileges sufficient to see the row in this instance):
+INSERT INTO document VALUES (33, 22, 1, 'rls_regress_user1', 'okay science fiction'); -- preparation for next statement
+INSERT INTO document VALUES (33, (SELECT cid from category WHERE cname = 'novel'), 1, 'rls_regress_user1', 'Some novel, replaces sci-fi') -- takes UPDATE path
+ ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle;
+-- Fine (we UPDATE, since INSERT WCOs and UPDATE security barrier quals + WCOs
+-- not violated):
+INSERT INTO document VALUES (2, (SELECT cid from category WHERE cname = 'novel'), 1, 'rls_regress_user1', 'my first novel')
+ ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle RETURNING *;
+-- Fine (we INSERT, so "cid = 33" ("technology") isn't evaluated):
+INSERT INTO document VALUES (78, (SELECT cid from category WHERE cname = 'novel'), 1, 'rls_regress_user1', 'some technology novel')
+ ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle, cid = 33 RETURNING *;
+-- Fine (same query, but we UPDATE, so "cid = 33", ("technology") is not the
+-- case in respect of *existing* tuple):
+INSERT INTO document VALUES (78, (SELECT cid from category WHERE cname = 'novel'), 1, 'rls_regress_user1', 'some technology novel')
+ ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle, cid = 33 RETURNING *;
+-- Same query a third time, but now fails due to existing tuple finally not
+-- passing quals:
+INSERT INTO document VALUES (78, (SELECT cid from category WHERE cname = 'novel'), 1, 'rls_regress_user1', 'some technology novel')
+ ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle, cid = 33 RETURNING *;
+-- Don't fail just because INSERT doesn't satisfy WITH CHECK option that
+-- originated as a barrier/USING() qual from the UPDATE. Note that the UPDATE
+-- path *isn't* taken, and so UPDATE-related policy does not apply:
+INSERT INTO document VALUES (79, (SELECT cid from category WHERE cname = 'technology'), 1, 'rls_regress_user1', 'technology book, can only insert')
+ ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle RETURNING *;
+-- But this time, the same statement fails, because the UPDATE path is taken,
+-- and updating the row just inserted falls afoul of security barrier qual
+-- (enforced as WCO) -- what we might have updated target tuple to is
+-- irrelevant, in fact.
+INSERT INTO document VALUES (79, (SELECT cid from category WHERE cname = 'technology'), 1, 'rls_regress_user1', 'technology book, can only insert')
+ ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle RETURNING *;
+
+-- Test default USING qual enforced as WCO
+SET SESSION AUTHORIZATION rls_regress_user0;
+DROP POLICY p1 ON document;
+DROP POLICY p2 ON document;
+DROP POLICY p3 ON document;
+
+CREATE POLICY p3_with_default ON document FOR UPDATE
+ USING (cid = (SELECT cid from category WHERE cname = 'novel'));
+
+SET SESSION AUTHORIZATION rls_regress_user1;
+-- Just because WCO-style enforcement of USING quals occurs with
+-- existing/target tuple does not mean that the implementation can be allowed
+-- to fail to also enforce this qual against the final tuple appended to
+-- relation (since in the absence of an explicit WCO, this is also interpreted
+-- as an UPDATE/ALL WCO in general).
+--
+-- UPDATE path is taken here (fails due to existing tuple). Note that this is
+-- not reported as a "USING expression", because it's an RLS UPDATE check that originated as
+-- a USING qual for the purposes of RLS in general, as opposed to an explicit
+-- USING qual that is ordinarily a security barrier. We leave it up to the
+-- UPDATE to make this fail:
+INSERT INTO document VALUES (79, (SELECT cid from category WHERE cname = 'technology'), 1, 'rls_regress_user1', 'technology book, can only insert')
+ ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle RETURNING *;
+
+-- UPDATE path is taken here. Existing tuple passes, since it's cid
+-- corresponds to "novel", but default USING qual is enforced against
+-- post-UPDATE tuple too (as always when updating with a policy that lacks an
+-- explicit WCO), and so this fails:
+INSERT INTO document VALUES (2, (SELECT cid from category WHERE cname = 'technology'), 1, 'rls_regress_user1', 'my first novel')
+ ON CONFLICT (did) DO UPDATE SET cid = EXCLUDED.cid, dtitle = EXCLUDED.dtitle RETURNING *;
+
+SET SESSION AUTHORIZATION rls_regress_user0;
+DROP POLICY p3_with_default ON document;
+
+--
+-- Test ALL policies with ON CONFLICT DO UPDATE (much the same as existing UPDATE
+-- tests)
+--
+CREATE POLICY p3_with_all ON document FOR ALL
+ USING (cid = (SELECT cid from category WHERE cname = 'novel'))
+ WITH CHECK (dauthor = current_user);
+
+SET SESSION AUTHORIZATION rls_regress_user1;
+
+-- Fails, since ALL WCO is enforced in insert path:
+INSERT INTO document VALUES (80, (SELECT cid from category WHERE cname = 'novel'), 1, 'rls_regress_user2', 'my first novel')
+ ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle, cid = 33;
+-- Fails, since ALL policy USING qual is enforced (existing, target tuple is in
+-- violation, since it has the "manga" cid):
+INSERT INTO document VALUES (4, (SELECT cid from category WHERE cname = 'novel'), 1, 'rls_regress_user1', 'my first novel')
+ ON CONFLICT (did) DO UPDATE SET dtitle = EXCLUDED.dtitle;
+-- Fails, since ALL WCO are enforced:
+INSERT INTO document VALUES (1, (SELECT cid from category WHERE cname = 'novel'), 1, 'rls_regress_user1', 'my first novel')
+ ON CONFLICT (did) DO UPDATE SET dauthor = 'rls_regress_user2';
--
-- ROLE/GROUP
diff --git a/src/test/regress/sql/rules.sql b/src/test/regress/sql/rules.sql
index c385e41457..6f1a1b84e7 100644
--- a/src/test/regress/sql/rules.sql
+++ b/src/test/regress/sql/rules.sql
@@ -680,6 +680,9 @@ SELECT * FROM shoelace_log ORDER BY sl_name;
insert into shoelace values ('sl9', 0, 'pink', 35.0, 'inch', 0.0);
insert into shoelace values ('sl10', 1000, 'magenta', 40.0, 'inch', 0.0);
+-- Unsupported (even though a similar updatable view construct is)
+insert into shoelace values ('sl10', 1000, 'magenta', 40.0, 'inch', 0.0)
+ on conflict do nothing;
SELECT * FROM shoelace_obsolete ORDER BY sl_len_cm;
SELECT * FROM shoelace_candelete;
@@ -844,6 +847,17 @@ insert into rule_and_refint_t3 values (1, 12, 11, 'row3');
insert into rule_and_refint_t3 values (1, 12, 12, 'row4');
insert into rule_and_refint_t3 values (1, 11, 13, 'row5');
insert into rule_and_refint_t3 values (1, 13, 11, 'row6');
+-- Ordinary table
+insert into rule_and_refint_t3 values (1, 13, 11, 'row6')
+ on conflict do nothing;
+-- rule not fired, so fk violation
+insert into rule_and_refint_t3 values (1, 13, 11, 'row6')
+ on conflict (id3a, id3b, id3c) do update
+ set id3b = excluded.id3b;
+-- rule fired, so unsupported
+insert into shoelace values ('sl9', 0, 'pink', 35.0, 'inch', 0.0)
+ on conflict (sl_name) do update
+ set sl_avail = excluded.sl_avail;
create rule rule_and_refint_t3_ins as on insert to rule_and_refint_t3
where (exists (select 1 from rule_and_refint_t3
@@ -1025,3 +1039,48 @@ drop view rule_v1;
create view rule_v1(x) as select * from (values(1,2)) v(q,w);
\d+ rule_v1
drop view rule_v1;
+
+--
+-- Check DO INSTEAD rules with ON CONFLICT
+--
+CREATE TABLE hats (
+ hat_name char(10) primary key,
+ hat_color char(10) -- hat color
+);
+
+CREATE TABLE hat_data (
+ hat_name char(10) primary key,
+ hat_color char(10) -- hat color
+);
+
+-- okay
+CREATE RULE hat_nosert AS ON INSERT TO hats
+ DO INSTEAD
+ INSERT INTO hat_data VALUES (
+ NEW.hat_name,
+ NEW.hat_color)
+ ON CONFLICT (hat_name) DO NOTHING RETURNING *;
+
+-- Works (projects row)
+INSERT INTO hats VALUES ('h7', 'black') RETURNING *;
+-- Works (does nothing)
+INSERT INTO hats VALUES ('h7', 'black') RETURNING *;
+SELECT tablename, rulename, definition FROM pg_rules
+ WHERE tablename = 'hats';
+DROP RULE hat_nosert ON hats;
+
+CREATE RULE hat_upsert AS ON INSERT TO hats
+ DO INSTEAD
+ INSERT INTO hat_data VALUES (
+ NEW.hat_name,
+ NEW.hat_color)
+ ON CONFLICT (hat_name) DO UPDATE SET hat_color = 'Orange' RETURNING *;
+
+-- Works (does upsert)
+INSERT INTO hats VALUES ('h7', 'black') RETURNING *;
+SELECT tablename, rulename, definition FROM pg_rules
+ WHERE tablename = 'hats';
+DROP RULE hat_upsert ON hats;
+
+drop table hats;
+drop table hat_data;
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index 4be2e40a00..2991223089 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -373,6 +373,20 @@ from
join
int4_tbl i4 on dummy = i4.f1;
+--
+-- Test case for subselect within UPDATE of INSERT...ON CONFLICT DO UPDATE
+--
+create temp table upsert(key int4 primary key, val text);
+insert into upsert values(1, 'val') on conflict (key) do update set val = 'not seen';
+insert into upsert values(1, 'val') on conflict (key) do update set val = 'seen with subselect ' || (select f1 from int4_tbl where f1 != 0 limit 1)::text;
+
+select * from upsert;
+
+with aa as (select 'int4_tbl' u from int4_tbl limit 1)
+insert into upsert values (1, 'x'), (999, 'y')
+on conflict (key) do update set val = (select u from aa)
+returning *;
+
--
-- Test case for cross-type partial matching in hashed subplan (bug #7597)
--
diff --git a/src/test/regress/sql/triggers.sql b/src/test/regress/sql/triggers.sql
index 0ea2c314de..9f66702cee 100644
--- a/src/test/regress/sql/triggers.sql
+++ b/src/test/regress/sql/triggers.sql
@@ -208,7 +208,7 @@ drop sequence ttdummy_seq;
CREATE TABLE log_table (tstamp timestamp default timeofday()::timestamp);
-CREATE TABLE main_table (a int, b int);
+CREATE TABLE main_table (a int unique, b int);
COPY main_table (a,b) FROM stdin;
5 10
@@ -237,6 +237,12 @@ FOR EACH STATEMENT EXECUTE PROCEDURE trigger_func('after_ins_stmt');
CREATE TRIGGER after_upd_stmt_trig AFTER UPDATE ON main_table
EXECUTE PROCEDURE trigger_func('after_upd_stmt');
+-- Both insert and update statement level triggers (before and after) should
+-- fire. Doesn't fire UPDATE before trigger, but only because one isn't
+-- defined.
+INSERT INTO main_table (a, b) VALUES (5, 10) ON CONFLICT (a)
+ DO UPDATE SET b = EXCLUDED.b;
+
CREATE TRIGGER after_upd_row_trig AFTER UPDATE ON main_table
FOR EACH ROW EXECUTE PROCEDURE trigger_func('after_upd_row');
@@ -246,6 +252,9 @@ UPDATE main_table SET a = a + 1 WHERE b < 30;
-- UPDATE that effects zero rows should still call per-statement trigger
UPDATE main_table SET a = a + 2 WHERE b > 100;
+-- constraint now unneeded
+ALTER TABLE main_table DROP CONSTRAINT main_table_a_key;
+
-- COPY should fire per-row and per-statement INSERT triggers
COPY main_table (a, b) FROM stdin;
30 40
@@ -1173,3 +1182,61 @@ select * from self_ref_trigger;
drop table self_ref_trigger;
drop function self_ref_trigger_ins_func();
drop function self_ref_trigger_del_func();
+
+--
+-- Verify behavior of before and after triggers with INSERT...ON CONFLICT
+-- DO UPDATE
+--
+create table upsert (key int4 primary key, color text);
+
+create function upsert_before_func()
+ returns trigger language plpgsql as
+$$
+begin
+ if (TG_OP = 'UPDATE') then
+ raise warning 'before update (old): %', old.*::text;
+ raise warning 'before update (new): %', new.*::text;
+ elsif (TG_OP = 'INSERT') then
+ raise warning 'before insert (new): %', new.*::text;
+ if new.key % 2 = 0 then
+ new.key := new.key + 1;
+ new.color := new.color || ' trig modified';
+ raise warning 'before insert (new, modified): %', new.*::text;
+ end if;
+ end if;
+ return new;
+end;
+$$;
+create trigger upsert_before_trig before insert or update on upsert
+ for each row execute procedure upsert_before_func();
+
+create function upsert_after_func()
+ returns trigger language plpgsql as
+$$
+begin
+ if (TG_OP = 'UPDATE') then
+ raise warning 'after update (old): %', new.*::text;
+ raise warning 'after update (new): %', new.*::text;
+ elsif (TG_OP = 'INSERT') then
+ raise warning 'after insert (new): %', new.*::text;
+ end if;
+ return null;
+end;
+$$;
+create trigger upsert_after_trig after insert or update on upsert
+ for each row execute procedure upsert_after_func();
+
+insert into upsert values(1, 'black') on conflict (key) do update set color = 'updated ' || upsert.color;
+insert into upsert values(2, 'red') on conflict (key) do update set color = 'updated ' || upsert.color;
+insert into upsert values(3, 'orange') on conflict (key) do update set color = 'updated ' || upsert.color;
+insert into upsert values(4, 'green') on conflict (key) do update set color = 'updated ' || upsert.color;
+insert into upsert values(5, 'purple') on conflict (key) do update set color = 'updated ' || upsert.color;
+insert into upsert values(6, 'white') on conflict (key) do update set color = 'updated ' || upsert.color;
+insert into upsert values(7, 'pink') on conflict (key) do update set color = 'updated ' || upsert.color;
+insert into upsert values(8, 'yellow') on conflict (key) do update set color = 'updated ' || upsert.color;
+
+select * from upsert;
+
+drop table upsert;
+drop function upsert_before_func();
+drop function upsert_after_func();
diff --git a/src/test/regress/sql/updatable_views.sql b/src/test/regress/sql/updatable_views.sql
index 697363665c..8fe96f5c51 100644
--- a/src/test/regress/sql/updatable_views.sql
+++ b/src/test/regress/sql/updatable_views.sql
@@ -69,6 +69,15 @@ DELETE FROM rw_view14 WHERE a=3; -- should be OK
-- Partially updatable view
INSERT INTO rw_view15 VALUES (3, 'ROW 3'); -- should fail
INSERT INTO rw_view15 (a) VALUES (3); -- should be OK
+INSERT INTO rw_view15 (a) VALUES (3) ON CONFLICT DO NOTHING; -- succeeds
+SELECT * FROM rw_view15;
+INSERT INTO rw_view15 (a) VALUES (3) ON CONFLICT (a) DO NOTHING; -- succeeds
+SELECT * FROM rw_view15;
+INSERT INTO rw_view15 (a) VALUES (3) ON CONFLICT (a) DO UPDATE set a = excluded.a; -- succeeds
+SELECT * FROM rw_view15;
+INSERT INTO rw_view15 (a) VALUES (3) ON CONFLICT (a) DO UPDATE set upper = 'blarg'; -- fails
+SELECT * FROM rw_view15;
+SELECT * FROM rw_view15;
ALTER VIEW rw_view15 ALTER COLUMN upper SET DEFAULT 'NOT SET';
INSERT INTO rw_view15 (a) VALUES (4); -- should fail
UPDATE rw_view15 SET upper='ROW 3' WHERE a=3; -- should fail
diff --git a/src/test/regress/sql/update.sql b/src/test/regress/sql/update.sql
index e71128c04d..5637c68acf 100644
--- a/src/test/regress/sql/update.sql
+++ b/src/test/regress/sql/update.sql
@@ -8,6 +8,11 @@ CREATE TABLE update_test (
c TEXT
);
+CREATE TABLE upsert_test (
+ a INT PRIMARY KEY,
+ b TEXT
+);
+
INSERT INTO update_test VALUES (5, 10, 'foo');
INSERT INTO update_test(b, a) VALUES (15, 10);
@@ -74,4 +79,20 @@ UPDATE update_test AS t SET b = update_test.b + 10 WHERE t.a = 10;
UPDATE update_test SET c = repeat('x', 10000) WHERE c = 'car';
SELECT a, b, char_length(c) FROM update_test;
+-- Test ON CONFLICT DO UPDATE
+INSERT INTO upsert_test VALUES(1, 'Boo');
+-- uncorrelated sub-select:
+WITH aaa AS (SELECT 1 AS a, 'Foo' AS b) INSERT INTO upsert_test
+ VALUES (1, 'Bar') ON CONFLICT(a)
+ DO UPDATE SET (b, a) = (SELECT b, a FROM aaa) RETURNING *;
+-- correlated sub-select:
+INSERT INTO upsert_test VALUES (1, 'Baz') ON CONFLICT(a)
+ DO UPDATE SET (b, a) = (SELECT b || ', Correlated', a from upsert_test i WHERE i.a = upsert_test.a)
+ RETURNING *;
+-- correlated sub-select (EXCLUDED.* alias):
+INSERT INTO upsert_test VALUES (1, 'Bat') ON CONFLICT(a)
+ DO UPDATE SET (b, a) = (SELECT b || ', Excluded', a from upsert_test i WHERE i.a = excluded.a)
+ RETURNING *;
+
DROP TABLE update_test;
+DROP TABLE upsert_test;
diff --git a/src/test/regress/sql/with.sql b/src/test/regress/sql/with.sql
index 1687c11983..3fd55f96b3 100644
--- a/src/test/regress/sql/with.sql
+++ b/src/test/regress/sql/with.sql
@@ -795,6 +795,63 @@ SELECT * FROM t LIMIT 10;
SELECT * FROM y;
+-- data-modifying WITH containing INSERT...ON CONFLICT DO UPDATE
+CREATE TABLE z AS SELECT i AS k, (i || ' v')::text v FROM generate_series(1, 16, 3) i;
+ALTER TABLE z ADD UNIQUE (k);
+
+WITH t AS (
+ INSERT INTO z SELECT i, 'insert'
+ FROM generate_series(0, 16) i
+ ON CONFLICT (k) DO UPDATE SET v = z.v || ', now update'
+ RETURNING *
+)
+SELECT * FROM t JOIN y ON t.k = y.a ORDER BY a, k;
+
+-- Test EXCLUDED.* reference within CTE
+WITH aa AS (
+ INSERT INTO z VALUES(1, 5) ON CONFLICT (k) DO UPDATE SET v = EXCLUDED.v
+ WHERE z.k != EXCLUDED.k
+ RETURNING *
+)
+SELECT * FROM aa;
+
+-- New query/snapshot demonstrates side-effects of previous query.
+SELECT * FROM z ORDER BY k;
+
+--
+-- Ensure subqueries within the update clause work, even if they
+-- reference outside values
+--
+WITH aa AS (SELECT 1 a, 2 b)
+INSERT INTO z VALUES(1, 'insert')
+ON CONFLICT (k) DO UPDATE SET v = (SELECT b || ' update' FROM aa WHERE a = 1 LIMIT 1);
+WITH aa AS (SELECT 1 a, 2 b)
+INSERT INTO z VALUES(1, 'insert')
+ON CONFLICT (k) DO UPDATE SET v = ' update' WHERE z.k = (SELECT a FROM aa);
+WITH aa AS (SELECT 1 a, 2 b)
+INSERT INTO z VALUES(1, 'insert')
+ON CONFLICT (k) DO UPDATE SET v = (SELECT b || ' update' FROM aa WHERE a = 1 LIMIT 1);
+WITH aa AS (SELECT 'a' a, 'b' b UNION ALL SELECT 'a' a, 'b' b)
+INSERT INTO z VALUES(1, 'insert')
+ON CONFLICT (k) DO UPDATE SET v = (SELECT b || ' update' FROM aa WHERE a = 'a' LIMIT 1);
+WITH aa AS (SELECT 1 a, 2 b)
+INSERT INTO z VALUES(1, (SELECT b || ' insert' FROM aa WHERE a = 1 ))
+ON CONFLICT (k) DO UPDATE SET v = (SELECT b || ' update' FROM aa WHERE a = 1 LIMIT 1);
+
+-- This shows an attempt to update an invisible row, which should really be
+-- reported as a cardinality violation, but it doesn't seem worth fixing:
+WITH simpletup AS (
+ SELECT 2 k, 'Green' v),
+upsert_cte AS (
+ INSERT INTO z VALUES(2, 'Blue') ON CONFLICT (k) DO
+ UPDATE SET (k, v) = (SELECT k, v FROM simpletup WHERE simpletup.k = z.k)
+ RETURNING k, v)
+INSERT INTO z VALUES(2, 'Red') ON CONFLICT (k) DO
+UPDATE SET (k, v) = (SELECT k, v FROM upsert_cte WHERE upsert_cte.k = z.k)
+RETURNING k, v;
+
+DROP TABLE z;
+
-- check that run to completion happens in proper ordering
TRUNCATE TABLE y;
--
cgit v1.2.3
From 1a8a4e5cde2b7755e11bde2ea7897bd650622d3e Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Sun, 10 May 2015 14:36:30 -0400
Subject: Code review for foreign/custom join pushdown patch.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 included some design
decisions that seem pretty questionable to me, and there was quite a lot
of stuff not to like about the documentation and comments. Clean up
as follows:
* Consider foreign joins only between foreign tables on the same server,
rather than between any two foreign tables with the same underlying FDW
handler function. In most if not all cases, the FDW would simply have had
to apply the same-server restriction itself (far more expensively, both for
lack of caching and because it would be repeated for each combination of
input sub-joins), or else risk nasty bugs. Anyone who's really intent on
doing something outside this restriction can always use the
set_join_pathlist_hook.
* Rename fdw_ps_tlist/custom_ps_tlist to fdw_scan_tlist/custom_scan_tlist
to better reflect what they're for, and allow these custom scan tlists
to be used even for base relations.
* Change make_foreignscan() API to include passing the fdw_scan_tlist
value, since the FDW is required to set that. Backwards compatibility
doesn't seem like an adequate reason to expect FDWs to set it in some
ad-hoc extra step, and anyway existing FDWs can just pass NIL.
* Change the API of path-generating subroutines of add_paths_to_joinrel,
and in particular that of GetForeignJoinPaths and set_join_pathlist_hook,
so that various less-used parameters are passed in a struct rather than
as separate parameter-list entries. The objective here is to reduce the
probability that future additions to those parameter lists will result in
source-level API breaks for users of these hooks. It's possible that this
is even a small win for the core code, since most CPU architectures can't
pass more than half a dozen parameters efficiently anyway. I kept root,
joinrel, outerrel, innerrel, and jointype as separate parameters to reduce
code churn in joinpath.c --- in particular, putting jointype into the
struct would have been problematic because of the subroutines' habit of
changing their local copies of that variable.
* Avoid ad-hocery in ExecAssignScanProjectionInfo. It was probably all
right for it to know about IndexOnlyScan, but if the list is to grow
we should refactor the knowledge out to the callers.
* Restore nodeForeignscan.c's previous use of the relcache to avoid
extra GetFdwRoutine lookups for base-relation scans.
* Lots of cleanup of documentation and missed comments. Re-order some
code additions into more logical places.
---
contrib/file_fdw/file_fdw.c | 3 +-
contrib/postgres_fdw/postgres_fdw.c | 3 +-
doc/src/sgml/custom-scan.sgml | 134 ++++++------
doc/src/sgml/fdwhandler.sgml | 152 +++++++++-----
src/backend/commands/explain.c | 4 +-
src/backend/executor/execScan.c | 23 +--
src/backend/executor/nodeCustom.c | 55 +++--
src/backend/executor/nodeForeignscan.c | 41 ++--
src/backend/executor/nodeIndexonlyscan.c | 6 +-
src/backend/foreign/foreign.c | 42 ++--
src/backend/nodes/copyfuncs.c | 8 +-
src/backend/nodes/outfuncs.c | 9 +-
src/backend/optimizer/path/joinpath.c | 342 +++++++++++--------------------
src/backend/optimizer/plan/createplan.c | 83 +++-----
src/backend/optimizer/plan/setrefs.c | 256 ++++++++++++-----------
src/backend/optimizer/plan/subselect.c | 2 +
src/backend/optimizer/util/plancat.c | 7 +-
src/backend/optimizer/util/relnode.c | 27 ++-
src/backend/utils/adt/ruleutils.c | 19 +-
src/include/executor/executor.h | 1 +
src/include/foreign/fdwapi.h | 27 ++-
src/include/nodes/plannodes.h | 37 +++-
src/include/nodes/primnodes.h | 10 +-
src/include/nodes/relation.h | 45 +++-
src/include/optimizer/paths.h | 14 +-
src/include/optimizer/planmain.h | 4 +-
26 files changed, 701 insertions(+), 653 deletions(-)
(limited to 'contrib/postgres_fdw/postgres_fdw.c')
diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c
index 4368897581..4c56444751 100644
--- a/contrib/file_fdw/file_fdw.c
+++ b/contrib/file_fdw/file_fdw.c
@@ -561,7 +561,8 @@ fileGetForeignPlan(PlannerInfo *root,
scan_clauses,
scan_relid,
NIL, /* no expressions to evaluate */
- best_path->fdw_private);
+ best_path->fdw_private,
+ NIL /* no custom tlist */ );
}
/*
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 173b4f06e6..ec89b25f61 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -872,7 +872,8 @@ postgresGetForeignPlan(PlannerInfo *root,
local_exprs,
scan_relid,
params_list,
- fdw_private);
+ fdw_private,
+ NIL /* no custom tlist */ );
}
/*
diff --git a/doc/src/sgml/custom-scan.sgml b/doc/src/sgml/custom-scan.sgml
index 9fd1db6fde..62a8a3305b 100644
--- a/doc/src/sgml/custom-scan.sgml
+++ b/doc/src/sgml/custom-scan.sgml
@@ -32,12 +32,13 @@
- Implementing Custom Paths
+ Creating Custom Scan Paths
- A custom scan provider will typically add paths by setting the following
- hook, which is called after the core code has generated what it believes
- to be the complete and correct set of access paths for the relation.
+ A custom scan provider will typically add paths for a base relation by
+ setting the following hook, which is called after the core code has
+ generated what it believes to be the complete and correct set of access
+ paths for the relation.
typedef void (*set_rel_pathlist_hook_type) (PlannerInfo *root,
RelOptInfo *rel,
@@ -74,7 +75,7 @@ typedef struct CustomPath
can support mark and restore. Both capabilities are optional.
custom_private> can be used to store the custom path's
private data. Private data should be stored in a form that can be handled
- by nodeToString>, so that debugging routines which attempt to
+ by nodeToString>, so that debugging routines that attempt to
print the custom path will work as designed. methods> must
point to a (usually statically allocated) object implementing the required
custom path methods, of which there are currently only two, as further
@@ -82,29 +83,28 @@ typedef struct CustomPath
- A custom scan provider can also add join paths; in this case, the scan
- must produce the same output as would normally be produced by the join
- it replaces. To do this, the join provider should set the following hook.
- This hook may be invoked repeatedly for the same pair of relations, with
- different combinations of inner and outer relations; it is the
- responsibility of the hook to minimize duplicated work.
+ A custom scan provider can also provide join paths. Just as for base
+ relations, such a path must produce the same output as would normally be
+ produced by the join it replaces. To do this, the join provider should
+ set the following hook, and then within the hook function,
+ create CustomPath> path(s) for the join relation.
typedef void (*set_join_pathlist_hook_type) (PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
- List *restrictlist,
JoinType jointype,
- SpecialJoinInfo *sjinfo,
- SemiAntiJoinFactors *semifactors,
- Relids param_source_rels,
- Relids extra_lateral_rels);
+ JoinPathExtraData *extra);
extern PGDLLIMPORT set_join_pathlist_hook_type set_join_pathlist_hook;
+
+ This hook will be invoked repeatedly for the same join relation, with
+ different combinations of inner and outer relations; it is the
+ responsibility of the hook to minimize duplicated work.
- Custom Path Callbacks
+ Custom Scan Path Callbacks
@@ -125,7 +125,7 @@ void (*TextOutCustomPath) (StringInfo str,
const CustomPath *node);
Generate additional output when nodeToString> is invoked on
- this custom path. This callback is optional. Since
+ this custom path. This callback is optional. Since
nodeToString> will automatically dump all fields in the
structure that it can see, including custom_private>, this
is only useful if the CustomPath> is actually embedded in a
@@ -135,7 +135,7 @@ void (*TextOutCustomPath) (StringInfo str,
- Implementing Custom Plans
+ Creating Custom Scan Plans
A custom scan is represented in a finished plan tree using the following
@@ -146,9 +146,9 @@ typedef struct CustomScan
Scan scan;
uint32 flags;
List *custom_exprs;
- List *custom_ps_tlist;
List *custom_private;
- List *custom_relids;
+ List *custom_scan_tlist;
+ Bitmapset *custom_relids;
const CustomScanMethods *methods;
} CustomScan;
@@ -158,16 +158,21 @@ typedef struct CustomScan
scan> must be initialized as for any other scan, including
estimated costs, target lists, qualifications, and so on.
flags> is a bitmask with the same meaning as in
- CustomPath>. custom_exprs> should be used to
+ CustomPath>.
+ custom_exprs> should be used to
store expression trees that will need to be fixed up by
setrefs.c> and subselect.c>, while
- custom_private> should be used to store other private data that
- is only used by the custom scan provider itself. Plan trees must be able
- to be duplicated using copyObject>, so all the data stored
- within these two fields must consist of nodes that function can handle.
- custom_relids> is set by the core code to the set of relations
- which this scan node must handle; except when this scan is replacing a
- join, it will have only one member.
+ custom_private> should be used to store other private data
+ that is only used by the custom scan provider itself.
+ custom_scan_tlist> can be NIL when scanning a base
+ relation, indicating that the custom scan returns scan tuples that match
+ the base relation's rowtype. Otherwise it is a targetlist describing
+ the actual scan tuples. custom_scan_tlist> must be
+ provided for joins, and could be provided for scans if the custom scan
+ provider can compute some non-Var expressions.
+ custom_relids> is set by the core code to the set of
+ relations (rangetable indexes) that this scan node handles; except when
+ this scan is replacing a join, it will have only one member.
methods> must point to a (usually statically allocated)
object implementing the required custom scan methods, which are further
detailed below.
@@ -175,19 +180,22 @@ typedef struct CustomScan
When a CustomScan> scans a single relation,
- scan.scanrelid> should be the range table index of the table
- to be scanned, and custom_ps_tlist> should be
- NULL>. When it replaces a join, scan.scanrelid>
- should be zero, and custom_ps_tlist> should be a list of
- TargetEntry> nodes. This is necessary because, when a join
- is replaced, the target list cannot be constructed from the table
- definition. At execution time, this list will be used to initialize the
- tuple descriptor of the TupleTableSlot>. It will also be
- used by EXPLAIN>, when deparsing.
+ scan.scanrelid> must be the range table index of the table
+ to be scanned. When it replaces a join, scan.scanrelid>
+ should be zero.
+
+
+
+ Plan trees must be able to be duplicated using copyObject>,
+ so all the data stored within the custom> fields must consist of
+ nodes that that function can handle. Furthermore, custom scan providers
+ cannot substitute a larger structure that embeds
+ a CustomScan> for the structure itself, as would be possible
+ for a CustomPath> or CustomScanState>.
- Custom Scan Callbacks
+ Custom Scan Plan Callbacks
Node *(*CreateCustomScanState) (CustomScan *cscan);
@@ -195,12 +203,12 @@ Node *(*CreateCustomScanState) (CustomScan *cscan);
Allocate a CustomScanState> for this
CustomScan>. The actual allocation will often be larger than
required for an ordinary CustomScanState>, because many
- scan types will wish to embed that as the first field of a large structure.
+ providers will wish to embed that as the first field of a larger structure.
The value returned must have the node tag and methods>
- set appropriately, but the other fields need not be initialized at this
+ set appropriately, but other fields should be left as zeroes at this
stage; after ExecInitCustomScan> performs basic initialization,
the BeginCustomScan> callback will be invoked to give the
- custom scan state a chance to do whatever else is needed.
+ custom scan provider a chance to do whatever else is needed.
@@ -209,23 +217,21 @@ void (*TextOutCustomScan) (StringInfo str,
const CustomScan *node);
Generate additional output when nodeToString> is invoked on
- this custom plan. This callback is optional. Since a
- CustomScan> must be copyable by copyObject>,
- custom scan providers cannot substitute a larger structure that embeds a
- CustomScan> for the structure itself, as would be possible
- for a CustomPath> or CustomScanState>.
- Therefore, providing this callback is unlikely to be useful.
+ this custom plan node. This callback is optional. Since
+ nodeToString> will automatically dump all fields in the
+ structure, including the substructure of the custom> fields,
+ there is usually not much need for this callback.
-
- Implementing Custom Scans
+
+ Executing Custom Scans
When a CustomScan> is executed, its execution state is
represented by a CustomScanState>, which is declared as
- follows.
+ follows:
typedef struct CustomScanState
{
@@ -237,7 +243,9 @@ typedef struct CustomScanState
- ss> must be initialized as for any other scanstate;
+ ss> is initialized as for any other scanstate,
+ except that if the scan is for a join rather than a base relation,
+ ss.ss_currentRelation> is left NULL.
flags> is a bitmask with the same meaning as in
CustomPath> and CustomScan>.
methods> must point to a (usually statically allocated)
@@ -247,8 +255,8 @@ typedef struct CustomScanState
structure embedding the above as its first member.
-
- Custom Execution-Time Callbacks
+
+ Custom Scan Execution Callbacks
@@ -257,8 +265,8 @@ void (*BeginCustomScan) (CustomScanState *node,
int eflags);
Complete initialization of the supplied CustomScanState>.
- Some initialization is performed by ExecInitCustomScan>, but
- any private fields should be initialized here.
+ Standard fields have been initialized by ExecInitCustomScan>,
+ but any private fields should be initialized here.
@@ -276,8 +284,8 @@ TupleTableSlot *(*ExecCustomScan) (CustomScanState *node);
void (*EndCustomScan) (CustomScanState *node);
Clean up any private data associated with the CustomScanState>.
- This method is required, but may not need to do anything if the associated
- data does not exist or will be cleaned up automatically.
+ This method is required, but it does not need to do anything if there is
+ no associated data or it will be cleaned up automatically.
@@ -293,8 +301,8 @@ void (*ReScanCustomScan) (CustomScanState *node);
void (*MarkPosCustomScan) (CustomScanState *node);
Save the current scan position so that it can subsequently be restored
- by the RestrPosCustomScan> callback. This calback is optional,
- and need only be supplied if
+ by the RestrPosCustomScan> callback. This callback is
+ optional, and need only be supplied if the
CUSTOMPATH_SUPPORT_MARK_RESTORE> flag is set.
@@ -304,7 +312,7 @@ void (*RestrPosCustomScan) (CustomScanState *node);
Restore the previous scan position as saved by the
MarkPosCustomScan> callback. This callback is optional,
- and need only be supplied if
+ and need only be supplied if the
CUSTOMPATH_SUPPORT_MARK_RESTORE> flag is set.
@@ -314,8 +322,8 @@ void (*ExplainCustomScan) (CustomScanState *node,
List *ancestors,
ExplainState *es);
- Output additional information on EXPLAIN> that involves
- custom-scan node. This callback is optional. Common data stored in the
+ Output additional information for EXPLAIN> of a custom-scan
+ plan node. This callback is optional. Common data stored in the
ScanState>, such as the target list and scan relation, will
be shown even without this callback, but the callback allows the display
of additional, private state.
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index bc06d2cbb2..33863f04f8 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -175,8 +175,11 @@ GetForeignPlan (PlannerInfo *root,
access path. This is called at the end of query planning.
The parameters are as for GetForeignRelSize>, plus
the selected ForeignPath> (previously produced by
- GetForeignPaths>), the target list to be emitted by the
- plan node, and the restriction clauses to be enforced by the plan node.
+ GetForeignPaths> or GetForeignJoinPaths>),
+ the target list to be emitted by the plan node,
+ and the restriction clauses to be enforced by the plan node.
+ (If the path is for a join rather than a base
+ relation, foreigntableid> is InvalidOid>.)
@@ -235,9 +238,12 @@ IterateForeignScan (ForeignScanState *node);
- The rows returned must match the column signature of the foreign table
- being scanned. If you choose to optimize away fetching columns that
- are not needed, you should insert nulls in those column positions.
+ The rows returned must match the fdw_scan_tlist> target
+ list if one was supplied, otherwise they must match the rowtype of the
+ foreign table being scanned. If you choose to optimize away fetching
+ columns that are not needed, you should insert nulls in those column
+ positions, or else generate a fdw_scan_tlist> list with
+ those columns omitted.
@@ -275,6 +281,67 @@ EndForeignScan (ForeignScanState *node);
+
+ FDW Routines For Scanning Foreign Joins
+
+
+ If an FDW supports performing foreign joins remotely (rather than
+ by fetching both tables' data and doing the join locally), it should
+ provide this callback function:
+
+
+
+
+void
+GetForeignJoinPaths (PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ JoinType jointype,
+ JoinPathExtraData *extra);
+
+ Create possible access paths for a join of two (or more) foreign tables
+ that all belong to the same foreign server. This optional
+ function is called during query planning. As
+ with GetForeignPaths>, this function should
+ generate ForeignPath> path(s) for the
+ supplied joinrel>, and call add_path> to add these
+ paths to the set of paths considered for the join. But unlike
+ GetForeignPaths>, it is not necessary that this function
+ succeed in creating at least one path, since paths involving local
+ joining are always possible.
+
+
+
+ Note that this function will be invoked repeatedly for the same join
+ relation, with different combinations of inner and outer relations; it is
+ the responsibility of the FDW to minimize duplicated work.
+
+
+
+ If a ForeignPath> path is chosen for the join, it will
+ represent the entire join process; paths generated for the component
+ tables and subsidiary joins will not be used. Subsequent processing of
+ the join path proceeds much as it does for a path scanning a single
+ foreign table. One difference is that the scanrelid> of
+ the resulting ForeignScan> plan node should be set to zero,
+ since there is no single relation that it represents; instead,
+ the fs_relids> field of the ForeignScan>
+ node represents the set of relations that were joined. (The latter field
+ is set up automatically by the core planner code, and need not be filled
+ by the FDW.) Another difference is that, because the column list for a
+ remote join cannot be found from the system catalogs, the FDW must
+ fill fdw_scan_tlist> with an appropriate list
+ of TargetEntry> nodes, representing the set of columns
+ it will supply at runtime in the tuples it returns.
+
+
+
+ See for additional information.
+
+
+
+
FDW Routines For Updating Foreign Tables
@@ -598,42 +665,6 @@ IsForeignRelUpdatable (Relation rel);
-
- FDW Routines For Remote Joins
-
-
-void
-GetForeignJoinPaths(PlannerInfo *root,
- RelOptInfo *joinrel,
- RelOptInfo *outerrel,
- RelOptInfo *innerrel,
- List *restrictlist,
- JoinType jointype,
- SpecialJoinInfo *sjinfo,
- SemiAntiJoinFactors *semifactors,
- Relids param_source_rels,
- Relids extra_lateral_rels);
-
- Create possible access paths for a join of two foreign tables managed
- by the same foreign data wrapper.
- This optional function is called during query planning.
-
-
- This function the FDW to add ForeignScan> paths for the
- supplied joinrel>. Typically, the FDW will send the whole
- join to the remote server as a single query, as performing the join
- remotely rather than locally is typically much more efficient.
-
-
- Since we cannot construct the slot descriptor for a remote join from
- the catalogs, the FDW should set the scanrelid> of the
- ForeignScan> to zero and fdw_ps_tlist>
- to an appropriate list of TargetEntry> nodes.
- Junk entries will be ignored, but can be present for the benefit of
- deparsing performed by EXPLAIN>.
-
-
-
FDW Routines for EXPLAIN>
@@ -904,10 +935,10 @@ GetForeignServerByName(const char *name, bool missing_ok);
The FDW callback functions GetForeignRelSize>,
- GetForeignPaths>, GetForeignPlan>, and
- PlanForeignModify> must fit into the workings of the
- PostgreSQL> planner. Here are some notes about what
- they must do.
+ GetForeignPaths>, GetForeignPlan>,
+ PlanForeignModify>, and GetForeignJoinPaths>
+ must fit into the workings of the PostgreSQL> planner.
+ Here are some notes about what they must do.
@@ -934,7 +965,7 @@ GetForeignServerByName(const char *name, bool missing_ok);
baserel->fdw_private> is a void> pointer that is
available for FDW planning functions to store information relevant to
the particular foreign table. The core planner does not touch it except
- to initialize it to NULL when the baserel> node is created.
+ to initialize it to NULL when the RelOptInfo> node is created.
It is useful for passing information forward from
GetForeignRelSize> to GetForeignPaths> and/or
GetForeignPaths> to GetForeignPlan>, thereby
@@ -1002,6 +1033,23 @@ GetForeignServerByName(const char *name, bool missing_ok);
evaluation of the fdw_exprs> expression tree.
+
+ Another ForeignScan> field that can be filled by FDWs
+ is fdw_scan_tlist>, which describes the tuples returned by
+ the FDW for this plan node. For simple foreign table scans this can be
+ set to NIL>, implying that the returned tuples have the
+ rowtype declared for the foreign table. A non-NIL value must be a
+ targetlist (list of TargetEntry>s) containing Vars and/or
+ expressions representing the returned columns. This might be used, for
+ example, to show that the FDW has omitted some columns that it noticed
+ won't be needed for the query. Also, if the FDW can compute expressions
+ used by the query more cheaply than can be done locally, it could add
+ those expressions to fdw_scan_tlist>. Note that join
+ plans (created from paths made by GetForeignJoinPaths>) must
+ always supply fdw_scan_tlist> to describe the set of
+ columns they will return.
+
+
The FDW should always construct at least one path that depends only on
the table's restriction clauses. In join queries, it might also choose
@@ -1019,6 +1067,18 @@ GetForeignServerByName(const char *name, bool missing_ok);
same as for an ordinary restriction clause.
+
+ If an FDW supports remote joins, GetForeignJoinPaths> should
+ produce ForeignPath>s for potential remote joins in much
+ the same way as GetForeignPaths> works for base tables.
+ Information about the intended join can be passed forward
+ to GetForeignPlan> in the same ways described above.
+ However, baserestrictinfo> is not relevant for join
+ relations; instead, the relevant join clauses for a particular join are
+ passed to GetForeignJoinPaths> as a separate parameter
+ (extra->restrictlist>).
+
+
When planning an UPDATE> or DELETE>,
PlanForeignModify> can look up the RelOptInfo>
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index c5452e3cb6..eeb8f19017 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -736,11 +736,11 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
break;
case T_ForeignScan:
*rels_used = bms_add_members(*rels_used,
- ((ForeignScan *) plan)->fdw_relids);
+ ((ForeignScan *) plan)->fs_relids);
break;
case T_CustomScan:
*rels_used = bms_add_members(*rels_used,
- ((CustomScan *) plan)->custom_relids);
+ ((CustomScan *) plan)->custom_relids);
break;
case T_ModifyTable:
*rels_used = bms_add_member(*rels_used,
diff --git a/src/backend/executor/execScan.c b/src/backend/executor/execScan.c
index fa475014f1..a96e826ba4 100644
--- a/src/backend/executor/execScan.c
+++ b/src/backend/executor/execScan.c
@@ -246,19 +246,18 @@ void
ExecAssignScanProjectionInfo(ScanState *node)
{
Scan *scan = (Scan *) node->ps.plan;
- Index varno;
- /* Vars in an index-only scan's tlist should be INDEX_VAR */
- if (IsA(scan, IndexOnlyScan))
- varno = INDEX_VAR;
- /* Also foreign or custom scan on pseudo relation should be INDEX_VAR */
- else if (scan->scanrelid == 0)
- {
- Assert(IsA(scan, ForeignScan) || IsA(scan, CustomScan));
- varno = INDEX_VAR;
- }
- else
- varno = scan->scanrelid;
+ ExecAssignScanProjectionInfoWithVarno(node, scan->scanrelid);
+}
+
+/*
+ * ExecAssignScanProjectionInfoWithVarno
+ * As above, but caller can specify varno expected in Vars in the tlist.
+ */
+void
+ExecAssignScanProjectionInfoWithVarno(ScanState *node, Index varno)
+{
+ Scan *scan = (Scan *) node->ps.plan;
if (tlist_matches_tupdesc(&node->ps,
scan->plan.targetlist,
diff --git a/src/backend/executor/nodeCustom.c b/src/backend/executor/nodeCustom.c
index db1b4f2ffa..0a022dff94 100644
--- a/src/backend/executor/nodeCustom.c
+++ b/src/backend/executor/nodeCustom.c
@@ -22,13 +22,24 @@
CustomScanState *
ExecInitCustomScan(CustomScan *cscan, EState *estate, int eflags)
{
- CustomScanState *css;
- Index scan_relid = cscan->scan.scanrelid;
+ CustomScanState *css;
+ Relation scan_rel = NULL;
+ Index scanrelid = cscan->scan.scanrelid;
+ Index tlistvarno;
- /* populate a CustomScanState according to the CustomScan */
+ /*
+ * Allocate the CustomScanState object. We let the custom scan provider
+ * do the palloc, in case it wants to make a larger object that embeds
+ * CustomScanState as the first field. It must set the node tag and the
+ * methods field correctly at this time. Other standard fields should be
+ * set to zero.
+ */
css = (CustomScanState *) cscan->methods->CreateCustomScanState(cscan);
Assert(IsA(css, CustomScanState));
+ /* ensure flags is filled correctly */
+ css->flags = cscan->flags;
+
/* fill up fields of ScanState */
css->ss.ps.plan = &cscan->scan.plan;
css->ss.ps.state = estate;
@@ -36,6 +47,8 @@ ExecInitCustomScan(CustomScan *cscan, EState *estate, int eflags)
/* create expression context for node */
ExecAssignExprContext(estate, &css->ss.ps);
+ css->ss.ps.ps_TupFromTlist = false;
+
/* initialize child expressions */
css->ss.ps.targetlist = (List *)
ExecInitExpr((Expr *) cscan->scan.plan.targetlist,
@@ -49,32 +62,40 @@ ExecInitCustomScan(CustomScan *cscan, EState *estate, int eflags)
ExecInitResultTupleSlot(estate, &css->ss.ps);
/*
- * open the base relation and acquire an appropriate lock on it;
- * also, get and assign the scan type
+ * open the base relation, if any, and acquire an appropriate lock on it
*/
- if (scan_relid > 0)
+ if (scanrelid > 0)
{
- Relation scan_rel;
-
- scan_rel = ExecOpenScanRelation(estate, scan_relid, eflags);
+ scan_rel = ExecOpenScanRelation(estate, scanrelid, eflags);
css->ss.ss_currentRelation = scan_rel;
- css->ss.ss_currentScanDesc = NULL; /* set by provider */
- ExecAssignScanType(&css->ss, RelationGetDescr(scan_rel));
}
- else
+
+ /*
+ * Determine the scan tuple type. If the custom scan provider provided a
+ * targetlist describing the scan tuples, use that; else use base
+ * relation's rowtype.
+ */
+ if (cscan->custom_scan_tlist != NIL || scan_rel == NULL)
{
- TupleDesc ps_tupdesc;
+ TupleDesc scan_tupdesc;
- ps_tupdesc = ExecCleanTypeFromTL(cscan->custom_ps_tlist, false);
- ExecAssignScanType(&css->ss, ps_tupdesc);
+ scan_tupdesc = ExecTypeFromTL(cscan->custom_scan_tlist, false);
+ ExecAssignScanType(&css->ss, scan_tupdesc);
+ /* Node's targetlist will contain Vars with varno = INDEX_VAR */
+ tlistvarno = INDEX_VAR;
+ }
+ else
+ {
+ ExecAssignScanType(&css->ss, RelationGetDescr(scan_rel));
+ /* Node's targetlist will contain Vars with varno = scanrelid */
+ tlistvarno = scanrelid;
}
- css->ss.ps.ps_TupFromTlist = false;
/*
* Initialize result tuple type and projection info.
*/
ExecAssignResultTypeFromTL(&css->ss.ps);
- ExecAssignScanProjectionInfo(&css->ss);
+ ExecAssignScanProjectionInfoWithVarno(&css->ss, tlistvarno);
/*
* The callback of custom-scan provider applies the final initialization
diff --git a/src/backend/executor/nodeForeignscan.c b/src/backend/executor/nodeForeignscan.c
index fa553ace5d..bb28a7372d 100644
--- a/src/backend/executor/nodeForeignscan.c
+++ b/src/backend/executor/nodeForeignscan.c
@@ -102,7 +102,9 @@ ForeignScanState *
ExecInitForeignScan(ForeignScan *node, EState *estate, int eflags)
{
ForeignScanState *scanstate;
+ Relation currentRelation = NULL;
Index scanrelid = node->scan.scanrelid;
+ Index tlistvarno;
FdwRoutine *fdwroutine;
/* check for unsupported flags */
@@ -141,40 +143,55 @@ ExecInitForeignScan(ForeignScan *node, EState *estate, int eflags)
ExecInitScanTupleSlot(estate, &scanstate->ss);
/*
- * open the base relation and acquire an appropriate lock on it;
- * also, get and assign the scan type
+ * open the base relation, if any, and acquire an appropriate lock on it;
+ * also acquire function pointers from the FDW's handler
*/
if (scanrelid > 0)
{
- Relation currentRelation;
-
currentRelation = ExecOpenScanRelation(estate, scanrelid, eflags);
scanstate->ss.ss_currentRelation = currentRelation;
- ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation));
+ fdwroutine = GetFdwRoutineForRelation(currentRelation, true);
}
else
{
- TupleDesc ps_tupdesc;
+ /* We can't use the relcache, so get fdwroutine the hard way */
+ fdwroutine = GetFdwRoutineByServerId(node->fs_server);
+ }
- ps_tupdesc = ExecCleanTypeFromTL(node->fdw_ps_tlist, false);
- ExecAssignScanType(&scanstate->ss, ps_tupdesc);
+ /*
+ * Determine the scan tuple type. If the FDW provided a targetlist
+ * describing the scan tuples, use that; else use base relation's rowtype.
+ */
+ if (node->fdw_scan_tlist != NIL || currentRelation == NULL)
+ {
+ TupleDesc scan_tupdesc;
+
+ scan_tupdesc = ExecTypeFromTL(node->fdw_scan_tlist, false);
+ ExecAssignScanType(&scanstate->ss, scan_tupdesc);
+ /* Node's targetlist will contain Vars with varno = INDEX_VAR */
+ tlistvarno = INDEX_VAR;
+ }
+ else
+ {
+ ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation));
+ /* Node's targetlist will contain Vars with varno = scanrelid */
+ tlistvarno = scanrelid;
}
/*
* Initialize result tuple type and projection info.
*/
ExecAssignResultTypeFromTL(&scanstate->ss.ps);
- ExecAssignScanProjectionInfo(&scanstate->ss);
+ ExecAssignScanProjectionInfoWithVarno(&scanstate->ss, tlistvarno);
/*
- * Acquire function pointers from the FDW's handler, and init fdw_state.
+ * Initialize FDW-related state.
*/
- fdwroutine = GetFdwRoutine(node->fdw_handler);
scanstate->fdwroutine = fdwroutine;
scanstate->fdw_state = NULL;
/*
- * Tell the FDW to initiate the scan.
+ * Tell the FDW to initialize the scan.
*/
fdwroutine->BeginForeignScan(scanstate, eflags);
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
index 06b7c3c457..61bd644ab7 100644
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -442,10 +442,12 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
ExecAssignScanType(&indexstate->ss, tupDesc);
/*
- * Initialize result tuple type and projection info.
+ * Initialize result tuple type and projection info. The node's
+ * targetlist will contain Vars with varno = INDEX_VAR, referencing the
+ * scan tuple.
*/
ExecAssignResultTypeFromTL(&indexstate->ss.ps);
- ExecAssignScanProjectionInfo(&indexstate->ss);
+ ExecAssignScanProjectionInfoWithVarno(&indexstate->ss, INDEX_VAR);
/*
* If we are just doing EXPLAIN (ie, aren't going to run the plan), stop
diff --git a/src/backend/foreign/foreign.c b/src/backend/foreign/foreign.c
index cdbd550fd4..763ee7c9bc 100644
--- a/src/backend/foreign/foreign.c
+++ b/src/backend/foreign/foreign.c
@@ -304,21 +304,16 @@ GetFdwRoutine(Oid fdwhandler)
/*
- * GetFdwHandlerByRelId - look up the handler of the foreign-data wrapper
- * for the given foreign table
+ * GetForeignServerIdByRelId - look up the foreign server
+ * for the given foreign table, and return its OID.
*/
Oid
-GetFdwHandlerByRelId(Oid relid)
+GetForeignServerIdByRelId(Oid relid)
{
HeapTuple tp;
- Form_pg_foreign_data_wrapper fdwform;
- Form_pg_foreign_server serverform;
Form_pg_foreign_table tableform;
Oid serverid;
- Oid fdwid;
- Oid fdwhandler;
- /* Get server OID for the foreign table. */
tp = SearchSysCache1(FOREIGNTABLEREL, ObjectIdGetDatum(relid));
if (!HeapTupleIsValid(tp))
elog(ERROR, "cache lookup failed for foreign table %u", relid);
@@ -326,6 +321,23 @@ GetFdwHandlerByRelId(Oid relid)
serverid = tableform->ftserver;
ReleaseSysCache(tp);
+ return serverid;
+}
+
+
+/*
+ * GetFdwRoutineByServerId - look up the handler of the foreign-data wrapper
+ * for the given foreign server, and retrieve its FdwRoutine struct.
+ */
+FdwRoutine *
+GetFdwRoutineByServerId(Oid serverid)
+{
+ HeapTuple tp;
+ Form_pg_foreign_data_wrapper fdwform;
+ Form_pg_foreign_server serverform;
+ Oid fdwid;
+ Oid fdwhandler;
+
/* Get foreign-data wrapper OID for the server. */
tp = SearchSysCache1(FOREIGNSERVEROID, ObjectIdGetDatum(serverid));
if (!HeapTupleIsValid(tp))
@@ -350,9 +362,11 @@ GetFdwHandlerByRelId(Oid relid)
ReleaseSysCache(tp);
- return fdwhandler;
+ /* And finally, call the handler function. */
+ return GetFdwRoutine(fdwhandler);
}
+
/*
* GetFdwRoutineByRelId - look up the handler of the foreign-data wrapper
* for the given foreign table, and retrieve its FdwRoutine struct.
@@ -360,9 +374,13 @@ GetFdwHandlerByRelId(Oid relid)
FdwRoutine *
GetFdwRoutineByRelId(Oid relid)
{
- Oid fdwhandler = GetFdwHandlerByRelId(relid);
+ Oid serverid;
- return GetFdwRoutine(fdwhandler);
+ /* Get server OID for the foreign table. */
+ serverid = GetForeignServerIdByRelId(relid);
+
+ /* Now retrieve server's FdwRoutine struct. */
+ return GetFdwRoutineByServerId(serverid);
}
/*
@@ -656,7 +674,7 @@ get_foreign_data_wrapper_oid(const char *fdwname, bool missing_ok)
/*
- * get_foreign_server_oid - given a FDW name, look up the OID
+ * get_foreign_server_oid - given a server name, look up the OID
*
* If missing_ok is false, throw an error if name not found. If true, just
* return InvalidOid.
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index a3139d3eb5..ed8fa72621 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -599,11 +599,11 @@ _copyForeignScan(const ForeignScan *from)
/*
* copy remainder of node
*/
- COPY_SCALAR_FIELD(fdw_handler);
+ COPY_SCALAR_FIELD(fs_server);
COPY_NODE_FIELD(fdw_exprs);
- COPY_NODE_FIELD(fdw_ps_tlist);
COPY_NODE_FIELD(fdw_private);
- COPY_BITMAPSET_FIELD(fdw_relids);
+ COPY_NODE_FIELD(fdw_scan_tlist);
+ COPY_BITMAPSET_FIELD(fs_relids);
COPY_SCALAR_FIELD(fsSystemCol);
return newnode;
@@ -627,8 +627,8 @@ _copyCustomScan(const CustomScan *from)
*/
COPY_SCALAR_FIELD(flags);
COPY_NODE_FIELD(custom_exprs);
- COPY_NODE_FIELD(custom_ps_tlist);
COPY_NODE_FIELD(custom_private);
+ COPY_NODE_FIELD(custom_scan_tlist);
COPY_BITMAPSET_FIELD(custom_relids);
/*
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index bc891d391f..fe868b889d 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -565,11 +565,11 @@ _outForeignScan(StringInfo str, const ForeignScan *node)
_outScanInfo(str, (const Scan *) node);
- WRITE_OID_FIELD(fdw_handler);
+ WRITE_OID_FIELD(fs_server);
WRITE_NODE_FIELD(fdw_exprs);
- WRITE_NODE_FIELD(fdw_ps_tlist);
WRITE_NODE_FIELD(fdw_private);
- WRITE_BITMAPSET_FIELD(fdw_relids);
+ WRITE_NODE_FIELD(fdw_scan_tlist);
+ WRITE_BITMAPSET_FIELD(fs_relids);
WRITE_BOOL_FIELD(fsSystemCol);
}
@@ -582,8 +582,8 @@ _outCustomScan(StringInfo str, const CustomScan *node)
WRITE_UINT_FIELD(flags);
WRITE_NODE_FIELD(custom_exprs);
- WRITE_NODE_FIELD(custom_ps_tlist);
WRITE_NODE_FIELD(custom_private);
+ WRITE_NODE_FIELD(custom_scan_tlist);
WRITE_BITMAPSET_FIELD(custom_relids);
appendStringInfoString(str, " :methods ");
_outToken(str, node->methods->CustomName);
@@ -1844,6 +1844,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
WRITE_NODE_FIELD(subplan);
WRITE_NODE_FIELD(subroot);
WRITE_NODE_FIELD(subplan_params);
+ WRITE_OID_FIELD(serverid);
/* we don't try to print fdwroutine or fdw_private */
WRITE_NODE_FIELD(baserestrictinfo);
WRITE_NODE_FIELD(joininfo);
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index dabef3c3c7..ba78252b8f 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -30,21 +30,13 @@ set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
- List *restrictlist, List *mergeclause_list,
- JoinType jointype, SpecialJoinInfo *sjinfo,
- Relids param_source_rels, Relids extra_lateral_rels);
+ JoinType jointype, JoinPathExtraData *extra);
static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
- List *restrictlist, List *mergeclause_list,
- JoinType jointype, SpecialJoinInfo *sjinfo,
- SemiAntiJoinFactors *semifactors,
- Relids param_source_rels, Relids extra_lateral_rels);
+ JoinType jointype, JoinPathExtraData *extra);
static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
- List *restrictlist,
- JoinType jointype, SpecialJoinInfo *sjinfo,
- SemiAntiJoinFactors *semifactors,
- Relids param_source_rels, Relids extra_lateral_rels);
+ JoinType jointype, JoinPathExtraData *extra);
static List *select_mergejoin_clauses(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel,
@@ -86,13 +78,16 @@ add_paths_to_joinrel(PlannerInfo *root,
SpecialJoinInfo *sjinfo,
List *restrictlist)
{
- List *mergeclause_list = NIL;
+ JoinPathExtraData extra;
bool mergejoin_allowed = true;
- SemiAntiJoinFactors semifactors;
- Relids param_source_rels = NULL;
- Relids extra_lateral_rels = NULL;
ListCell *lc;
+ extra.restrictlist = restrictlist;
+ extra.mergeclause_list = NIL;
+ extra.sjinfo = sjinfo;
+ extra.param_source_rels = NULL;
+ extra.extra_lateral_rels = NULL;
+
/*
* Find potential mergejoin clauses. We can skip this if we are not
* interested in doing a mergejoin. However, mergejoin may be our only
@@ -100,13 +95,13 @@ add_paths_to_joinrel(PlannerInfo *root,
* it's a full join.
*/
if (enable_mergejoin || jointype == JOIN_FULL)
- mergeclause_list = select_mergejoin_clauses(root,
- joinrel,
- outerrel,
- innerrel,
- restrictlist,
- jointype,
- &mergejoin_allowed);
+ extra.mergeclause_list = select_mergejoin_clauses(root,
+ joinrel,
+ outerrel,
+ innerrel,
+ restrictlist,
+ jointype,
+ &mergejoin_allowed);
/*
* If it's SEMI or ANTI join, compute correction factors for cost
@@ -115,7 +110,7 @@ add_paths_to_joinrel(PlannerInfo *root,
if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
compute_semi_anti_join_factors(root, outerrel, innerrel,
jointype, sjinfo, restrictlist,
- &semifactors);
+ &extra.semifactors);
/*
* Decide whether it's sensible to generate parameterized paths for this
@@ -142,16 +137,16 @@ add_paths_to_joinrel(PlannerInfo *root,
*/
if (bms_overlap(joinrel->relids, sjinfo->min_righthand) &&
!bms_overlap(joinrel->relids, sjinfo->min_lefthand))
- param_source_rels = bms_join(param_source_rels,
- bms_difference(root->all_baserels,
+ extra.param_source_rels = bms_join(extra.param_source_rels,
+ bms_difference(root->all_baserels,
sjinfo->min_righthand));
/* full joins constrain both sides symmetrically */
if (sjinfo->jointype == JOIN_FULL &&
bms_overlap(joinrel->relids, sjinfo->min_lefthand) &&
!bms_overlap(joinrel->relids, sjinfo->min_righthand))
- param_source_rels = bms_join(param_source_rels,
- bms_difference(root->all_baserels,
+ extra.param_source_rels = bms_join(extra.param_source_rels,
+ bms_difference(root->all_baserels,
sjinfo->min_lefthand));
}
@@ -168,9 +163,9 @@ add_paths_to_joinrel(PlannerInfo *root,
LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(lc);
if (bms_is_subset(ljinfo->lateral_rhs, joinrel->relids))
- param_source_rels = bms_join(param_source_rels,
- bms_difference(ljinfo->lateral_lhs,
- joinrel->relids));
+ extra.param_source_rels = bms_join(extra.param_source_rels,
+ bms_difference(ljinfo->lateral_lhs,
+ joinrel->relids));
}
/*
@@ -195,8 +190,8 @@ add_paths_to_joinrel(PlannerInfo *root,
!bms_is_subset(phinfo->ph_eval_at, innerrel->relids))
{
/* Yes, remember its lateral rels */
- extra_lateral_rels = bms_add_members(extra_lateral_rels,
- phinfo->ph_lateral);
+ extra.extra_lateral_rels = bms_add_members(extra.extra_lateral_rels,
+ phinfo->ph_lateral);
}
}
@@ -206,9 +201,10 @@ add_paths_to_joinrel(PlannerInfo *root,
* it to required_outer below, while preserving the property that
* required_outer is exactly NULL if empty.)
*/
- extra_lateral_rels = bms_del_members(extra_lateral_rels, joinrel->relids);
- if (bms_is_empty(extra_lateral_rels))
- extra_lateral_rels = NULL;
+ extra.extra_lateral_rels = bms_del_members(extra.extra_lateral_rels,
+ joinrel->relids);
+ if (bms_is_empty(extra.extra_lateral_rels))
+ extra.extra_lateral_rels = NULL;
/*
* 1. Consider mergejoin paths where both relations must be explicitly
@@ -216,9 +212,7 @@ add_paths_to_joinrel(PlannerInfo *root,
*/
if (mergejoin_allowed)
sort_inner_and_outer(root, joinrel, outerrel, innerrel,
- restrictlist, mergeclause_list, jointype,
- sjinfo,
- param_source_rels, extra_lateral_rels);
+ jointype, &extra);
/*
* 2. Consider paths where the outer relation need not be explicitly
@@ -229,9 +223,7 @@ add_paths_to_joinrel(PlannerInfo *root,
*/
if (mergejoin_allowed)
match_unsorted_outer(root, joinrel, outerrel, innerrel,
- restrictlist, mergeclause_list, jointype,
- sjinfo, &semifactors,
- param_source_rels, extra_lateral_rels);
+ jointype, &extra);
#ifdef NOT_USED
@@ -248,9 +240,7 @@ add_paths_to_joinrel(PlannerInfo *root,
*/
if (mergejoin_allowed)
match_unsorted_inner(root, joinrel, outerrel, innerrel,
- restrictlist, mergeclause_list, jointype,
- sjinfo, &semifactors,
- param_source_rels, extra_lateral_rels);
+ jointype, &extra);
#endif
/*
@@ -260,30 +250,24 @@ add_paths_to_joinrel(PlannerInfo *root,
*/
if (enable_hashjoin || jointype == JOIN_FULL)
hash_inner_and_outer(root, joinrel, outerrel, innerrel,
- restrictlist, jointype,
- sjinfo, &semifactors,
- param_source_rels, extra_lateral_rels);
+ jointype, &extra);
/*
- * 5. If both inner and outer relations are managed by the same FDW,
- * give it a chance to push down joins.
+ * 5. If inner and outer relations are foreign tables (or joins) belonging
+ * to the same server, give the FDW a chance to push down joins.
*/
if (joinrel->fdwroutine &&
joinrel->fdwroutine->GetForeignJoinPaths)
joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
outerrel, innerrel,
- restrictlist, jointype, sjinfo,
- &semifactors,
- param_source_rels,
- extra_lateral_rels);
+ jointype, &extra);
+
/*
* 6. Finally, give extensions a chance to manipulate the path list.
*/
if (set_join_pathlist_hook)
set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
- restrictlist, jointype,
- sjinfo, &semifactors,
- param_source_rels, extra_lateral_rels);
+ jointype, &extra);
}
/*
@@ -294,15 +278,11 @@ add_paths_to_joinrel(PlannerInfo *root,
static void
try_nestloop_path(PlannerInfo *root,
RelOptInfo *joinrel,
- JoinType jointype,
- SpecialJoinInfo *sjinfo,
- SemiAntiJoinFactors *semifactors,
- Relids param_source_rels,
- Relids extra_lateral_rels,
Path *outer_path,
Path *inner_path,
- List *restrict_clauses,
- List *pathkeys)
+ List *pathkeys,
+ JoinType jointype,
+ JoinPathExtraData *extra)
{
Relids required_outer;
JoinCostWorkspace workspace;
@@ -314,7 +294,7 @@ try_nestloop_path(PlannerInfo *root,
required_outer = calc_nestloop_required_outer(outer_path,
inner_path);
if (required_outer &&
- !bms_overlap(required_outer, param_source_rels))
+ !bms_overlap(required_outer, extra->param_source_rels))
{
/*
* We override the param_source_rels heuristic to accept nestloop
@@ -345,7 +325,7 @@ try_nestloop_path(PlannerInfo *root,
* Independently of that, add parameterization needed for any
* PlaceHolderVars that need to be computed at the join.
*/
- required_outer = bms_add_members(required_outer, extra_lateral_rels);
+ required_outer = bms_add_members(required_outer, extra->extra_lateral_rels);
/*
* Do a precheck to quickly eliminate obviously-inferior paths. We
@@ -358,7 +338,7 @@ try_nestloop_path(PlannerInfo *root,
*/
initial_cost_nestloop(root, &workspace, jointype,
outer_path, inner_path,
- sjinfo, semifactors);
+ extra->sjinfo, &extra->semifactors);
if (add_path_precheck(joinrel,
workspace.startup_cost, workspace.total_cost,
@@ -369,11 +349,11 @@ try_nestloop_path(PlannerInfo *root,
joinrel,
jointype,
&workspace,
- sjinfo,
- semifactors,
+ extra->sjinfo,
+ &extra->semifactors,
outer_path,
inner_path,
- restrict_clauses,
+ extra->restrictlist,
pathkeys,
required_outer));
}
@@ -392,17 +372,14 @@ try_nestloop_path(PlannerInfo *root,
static void
try_mergejoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
- JoinType jointype,
- SpecialJoinInfo *sjinfo,
- Relids param_source_rels,
- Relids extra_lateral_rels,
Path *outer_path,
Path *inner_path,
- List *restrict_clauses,
List *pathkeys,
List *mergeclauses,
List *outersortkeys,
- List *innersortkeys)
+ List *innersortkeys,
+ JoinType jointype,
+ JoinPathExtraData *extra)
{
Relids required_outer;
JoinCostWorkspace workspace;
@@ -414,7 +391,7 @@ try_mergejoin_path(PlannerInfo *root,
required_outer = calc_non_nestloop_required_outer(outer_path,
inner_path);
if (required_outer &&
- !bms_overlap(required_outer, param_source_rels))
+ !bms_overlap(required_outer, extra->param_source_rels))
{
/* Waste no memory when we reject a path here */
bms_free(required_outer);
@@ -425,7 +402,7 @@ try_mergejoin_path(PlannerInfo *root,
* Independently of that, add parameterization needed for any
* PlaceHolderVars that need to be computed at the join.
*/
- required_outer = bms_add_members(required_outer, extra_lateral_rels);
+ required_outer = bms_add_members(required_outer, extra->extra_lateral_rels);
/*
* If the given paths are already well enough ordered, we can skip doing
@@ -444,7 +421,7 @@ try_mergejoin_path(PlannerInfo *root,
initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
outer_path, inner_path,
outersortkeys, innersortkeys,
- sjinfo);
+ extra->sjinfo);
if (add_path_precheck(joinrel,
workspace.startup_cost, workspace.total_cost,
@@ -455,10 +432,10 @@ try_mergejoin_path(PlannerInfo *root,
joinrel,
jointype,
&workspace,
- sjinfo,
+ extra->sjinfo,
outer_path,
inner_path,
- restrict_clauses,
+ extra->restrictlist,
pathkeys,
required_outer,
mergeclauses,
@@ -480,15 +457,11 @@ try_mergejoin_path(PlannerInfo *root,
static void
try_hashjoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
- JoinType jointype,
- SpecialJoinInfo *sjinfo,
- SemiAntiJoinFactors *semifactors,
- Relids param_source_rels,
- Relids extra_lateral_rels,
Path *outer_path,
Path *inner_path,
- List *restrict_clauses,
- List *hashclauses)
+ List *hashclauses,
+ JoinType jointype,
+ JoinPathExtraData *extra)
{
Relids required_outer;
JoinCostWorkspace workspace;
@@ -500,7 +473,7 @@ try_hashjoin_path(PlannerInfo *root,
required_outer = calc_non_nestloop_required_outer(outer_path,
inner_path);
if (required_outer &&
- !bms_overlap(required_outer, param_source_rels))
+ !bms_overlap(required_outer, extra->param_source_rels))
{
/* Waste no memory when we reject a path here */
bms_free(required_outer);
@@ -511,7 +484,7 @@ try_hashjoin_path(PlannerInfo *root,
* Independently of that, add parameterization needed for any
* PlaceHolderVars that need to be computed at the join.
*/
- required_outer = bms_add_members(required_outer, extra_lateral_rels);
+ required_outer = bms_add_members(required_outer, extra->extra_lateral_rels);
/*
* See comments in try_nestloop_path(). Also note that hashjoin paths
@@ -519,7 +492,7 @@ try_hashjoin_path(PlannerInfo *root,
*/
initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
outer_path, inner_path,
- sjinfo, semifactors);
+ extra->sjinfo, &extra->semifactors);
if (add_path_precheck(joinrel,
workspace.startup_cost, workspace.total_cost,
@@ -530,11 +503,11 @@ try_hashjoin_path(PlannerInfo *root,
joinrel,
jointype,
&workspace,
- sjinfo,
- semifactors,
+ extra->sjinfo,
+ &extra->semifactors,
outer_path,
inner_path,
- restrict_clauses,
+ extra->restrictlist,
required_outer,
hashclauses));
}
@@ -584,26 +557,16 @@ clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel,
* 'joinrel' is the join relation
* 'outerrel' is the outer join relation
* 'innerrel' is the inner join relation
- * 'restrictlist' contains all of the RestrictInfo nodes for restriction
- * clauses that apply to this join
- * 'mergeclause_list' is a list of RestrictInfo nodes for available
- * mergejoin clauses in this join
* 'jointype' is the type of join to do
- * 'sjinfo' is extra info about the join for selectivity estimation
- * 'param_source_rels' are OK targets for parameterization of result paths
- * 'extra_lateral_rels' are additional parameterization for result paths
+ * 'extra' contains additional input values
*/
static void
sort_inner_and_outer(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
- List *restrictlist,
- List *mergeclause_list,
JoinType jointype,
- SpecialJoinInfo *sjinfo,
- Relids param_source_rels,
- Relids extra_lateral_rels)
+ JoinPathExtraData *extra)
{
Path *outer_path;
Path *inner_path;
@@ -643,14 +606,14 @@ sort_inner_and_outer(PlannerInfo *root,
if (jointype == JOIN_UNIQUE_OUTER)
{
outer_path = (Path *) create_unique_path(root, outerrel,
- outer_path, sjinfo);
+ outer_path, extra->sjinfo);
Assert(outer_path);
jointype = JOIN_INNER;
}
else if (jointype == JOIN_UNIQUE_INNER)
{
inner_path = (Path *) create_unique_path(root, innerrel,
- inner_path, sjinfo);
+ inner_path, extra->sjinfo);
Assert(inner_path);
jointype = JOIN_INNER;
}
@@ -684,7 +647,7 @@ sort_inner_and_outer(PlannerInfo *root,
* exactly as-is as well as making variants.
*/
all_pathkeys = select_outer_pathkeys_for_merge(root,
- mergeclause_list,
+ extra->mergeclause_list,
joinrel);
foreach(l, all_pathkeys)
@@ -707,10 +670,10 @@ sort_inner_and_outer(PlannerInfo *root,
cur_mergeclauses = find_mergeclauses_for_pathkeys(root,
outerkeys,
true,
- mergeclause_list);
+ extra->mergeclause_list);
/* Should have used them all... */
- Assert(list_length(cur_mergeclauses) == list_length(mergeclause_list));
+ Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list));
/* Build sort pathkeys for the inner side */
innerkeys = make_inner_pathkeys_for_merge(root,
@@ -730,17 +693,14 @@ sort_inner_and_outer(PlannerInfo *root,
*/
try_mergejoin_path(root,
joinrel,
- jointype,
- sjinfo,
- param_source_rels,
- extra_lateral_rels,
outer_path,
inner_path,
- restrictlist,
merge_pathkeys,
cur_mergeclauses,
outerkeys,
- innerkeys);
+ innerkeys,
+ jointype,
+ extra);
}
}
@@ -771,28 +731,16 @@ sort_inner_and_outer(PlannerInfo *root,
* 'joinrel' is the join relation
* 'outerrel' is the outer join relation
* 'innerrel' is the inner join relation
- * 'restrictlist' contains all of the RestrictInfo nodes for restriction
- * clauses that apply to this join
- * 'mergeclause_list' is a list of RestrictInfo nodes for available
- * mergejoin clauses in this join
* 'jointype' is the type of join to do
- * 'sjinfo' is extra info about the join for selectivity estimation
- * 'semifactors' contains valid data if jointype is SEMI or ANTI
- * 'param_source_rels' are OK targets for parameterization of result paths
- * 'extra_lateral_rels' are additional parameterization for result paths
+ * 'extra' contains additional input values
*/
static void
match_unsorted_outer(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
- List *restrictlist,
- List *mergeclause_list,
JoinType jointype,
- SpecialJoinInfo *sjinfo,
- SemiAntiJoinFactors *semifactors,
- Relids param_source_rels,
- Relids extra_lateral_rels)
+ JoinPathExtraData *extra)
{
JoinType save_jointype = jointype;
bool nestjoinOK;
@@ -854,7 +802,7 @@ match_unsorted_outer(PlannerInfo *root,
if (inner_cheapest_total == NULL)
return;
inner_cheapest_total = (Path *)
- create_unique_path(root, innerrel, inner_cheapest_total, sjinfo);
+ create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo);
Assert(inner_cheapest_total);
}
else if (nestjoinOK)
@@ -898,7 +846,7 @@ match_unsorted_outer(PlannerInfo *root,
if (outerpath != outerrel->cheapest_total_path)
continue;
outerpath = (Path *) create_unique_path(root, outerrel,
- outerpath, sjinfo);
+ outerpath, extra->sjinfo);
Assert(outerpath);
}
@@ -918,15 +866,11 @@ match_unsorted_outer(PlannerInfo *root,
*/
try_nestloop_path(root,
joinrel,
- jointype,
- sjinfo,
- semifactors,
- param_source_rels,
- extra_lateral_rels,
outerpath,
inner_cheapest_total,
- restrictlist,
- merge_pathkeys);
+ merge_pathkeys,
+ jointype,
+ extra);
}
else if (nestjoinOK)
{
@@ -944,30 +888,22 @@ match_unsorted_outer(PlannerInfo *root,
try_nestloop_path(root,
joinrel,
- jointype,
- sjinfo,
- semifactors,
- param_source_rels,
- extra_lateral_rels,
outerpath,
innerpath,
- restrictlist,
- merge_pathkeys);
+ merge_pathkeys,
+ jointype,
+ extra);
}
/* Also consider materialized form of the cheapest inner path */
if (matpath != NULL)
try_nestloop_path(root,
joinrel,
- jointype,
- sjinfo,
- semifactors,
- param_source_rels,
- extra_lateral_rels,
outerpath,
matpath,
- restrictlist,
- merge_pathkeys);
+ merge_pathkeys,
+ jointype,
+ extra);
}
/* Can't do anything else if outer path needs to be unique'd */
@@ -982,7 +918,7 @@ match_unsorted_outer(PlannerInfo *root,
mergeclauses = find_mergeclauses_for_pathkeys(root,
outerpath->pathkeys,
true,
- mergeclause_list);
+ extra->mergeclause_list);
/*
* Done with this outer path if no chance for a mergejoin.
@@ -1000,7 +936,7 @@ match_unsorted_outer(PlannerInfo *root,
else
continue;
}
- if (useallclauses && list_length(mergeclauses) != list_length(mergeclause_list))
+ if (useallclauses && list_length(mergeclauses) != list_length(extra->mergeclause_list))
continue;
/* Compute the required ordering of the inner path */
@@ -1016,17 +952,14 @@ match_unsorted_outer(PlannerInfo *root,
*/
try_mergejoin_path(root,
joinrel,
- jointype,
- sjinfo,
- param_source_rels,
- extra_lateral_rels,
outerpath,
inner_cheapest_total,
- restrictlist,
merge_pathkeys,
mergeclauses,
NIL,
- innersortkeys);
+ innersortkeys,
+ jointype,
+ extra);
/* Can't do anything else if inner path needs to be unique'd */
if (save_jointype == JOIN_UNIQUE_INNER)
@@ -1115,17 +1048,14 @@ match_unsorted_outer(PlannerInfo *root,
newclauses = mergeclauses;
try_mergejoin_path(root,
joinrel,
- jointype,
- sjinfo,
- param_source_rels,
- extra_lateral_rels,
outerpath,
innerpath,
- restrictlist,
merge_pathkeys,
newclauses,
NIL,
- NIL);
+ NIL,
+ jointype,
+ extra);
cheapest_total_inner = innerpath;
}
/* Same on the basis of cheapest startup cost ... */
@@ -1161,17 +1091,14 @@ match_unsorted_outer(PlannerInfo *root,
}
try_mergejoin_path(root,
joinrel,
- jointype,
- sjinfo,
- param_source_rels,
- extra_lateral_rels,
outerpath,
innerpath,
- restrictlist,
merge_pathkeys,
newclauses,
NIL,
- NIL);
+ NIL,
+ jointype,
+ extra);
}
cheapest_startup_inner = innerpath;
}
@@ -1193,25 +1120,16 @@ match_unsorted_outer(PlannerInfo *root,
* 'joinrel' is the join relation
* 'outerrel' is the outer join relation
* 'innerrel' is the inner join relation
- * 'restrictlist' contains all of the RestrictInfo nodes for restriction
- * clauses that apply to this join
* 'jointype' is the type of join to do
- * 'sjinfo' is extra info about the join for selectivity estimation
- * 'semifactors' contains valid data if jointype is SEMI or ANTI
- * 'param_source_rels' are OK targets for parameterization of result paths
- * 'extra_lateral_rels' are additional parameterization for result paths
+ * 'extra' contains additional input values
*/
static void
hash_inner_and_outer(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
- List *restrictlist,
JoinType jointype,
- SpecialJoinInfo *sjinfo,
- SemiAntiJoinFactors *semifactors,
- Relids param_source_rels,
- Relids extra_lateral_rels)
+ JoinPathExtraData *extra)
{
bool isouterjoin = IS_OUTER_JOIN(jointype);
List *hashclauses;
@@ -1225,7 +1143,7 @@ hash_inner_and_outer(PlannerInfo *root,
* usable with this pair of sub-relations.
*/
hashclauses = NIL;
- foreach(l, restrictlist)
+ foreach(l, extra->restrictlist)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
@@ -1276,53 +1194,41 @@ hash_inner_and_outer(PlannerInfo *root,
{
cheapest_total_outer = (Path *)
create_unique_path(root, outerrel,
- cheapest_total_outer, sjinfo);
+ cheapest_total_outer, extra->sjinfo);
Assert(cheapest_total_outer);
jointype = JOIN_INNER;
try_hashjoin_path(root,
joinrel,
- jointype,
- sjinfo,
- semifactors,
- param_source_rels,
- extra_lateral_rels,
cheapest_total_outer,
cheapest_total_inner,
- restrictlist,
- hashclauses);
+ hashclauses,
+ jointype,
+ extra);
/* no possibility of cheap startup here */
}
else if (jointype == JOIN_UNIQUE_INNER)
{
cheapest_total_inner = (Path *)
create_unique_path(root, innerrel,
- cheapest_total_inner, sjinfo);
+ cheapest_total_inner, extra->sjinfo);
Assert(cheapest_total_inner);
jointype = JOIN_INNER;
try_hashjoin_path(root,
joinrel,
- jointype,
- sjinfo,
- semifactors,
- param_source_rels,
- extra_lateral_rels,
cheapest_total_outer,
cheapest_total_inner,
- restrictlist,
- hashclauses);
+ hashclauses,
+ jointype,
+ extra);
if (cheapest_startup_outer != NULL &&
cheapest_startup_outer != cheapest_total_outer)
try_hashjoin_path(root,
joinrel,
- jointype,
- sjinfo,
- semifactors,
- param_source_rels,
- extra_lateral_rels,
cheapest_startup_outer,
cheapest_total_inner,
- restrictlist,
- hashclauses);
+ hashclauses,
+ jointype,
+ extra);
}
else
{
@@ -1339,15 +1245,11 @@ hash_inner_and_outer(PlannerInfo *root,
if (cheapest_startup_outer != NULL)
try_hashjoin_path(root,
joinrel,
- jointype,
- sjinfo,
- semifactors,
- param_source_rels,
- extra_lateral_rels,
cheapest_startup_outer,
cheapest_total_inner,
- restrictlist,
- hashclauses);
+ hashclauses,
+ jointype,
+ extra);
foreach(lc1, outerrel->cheapest_parameterized_paths)
{
@@ -1377,15 +1279,11 @@ hash_inner_and_outer(PlannerInfo *root,
try_hashjoin_path(root,
joinrel,
- jointype,
- sjinfo,
- semifactors,
- param_source_rels,
- extra_lateral_rels,
outerpath,
innerpath,
- restrictlist,
- hashclauses);
+ hashclauses,
+ jointype,
+ extra);
}
}
}
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 3246332d6e..c809237283 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -44,6 +44,7 @@
#include "utils/lsyscache.h"
+static Plan *create_plan_recurse(PlannerInfo *root, Path *best_path);
static Plan *create_scan_plan(PlannerInfo *root, Path *best_path);
static List *build_path_tlist(PlannerInfo *root, Path *path);
static bool use_physical_tlist(PlannerInfo *root, RelOptInfo *rel);
@@ -219,7 +220,7 @@ create_plan(PlannerInfo *root, Path *best_path)
* create_plan_recurse
* Recursive guts of create_plan().
*/
-Plan *
+static Plan *
create_plan_recurse(PlannerInfo *root, Path *best_path)
{
Plan *plan;
@@ -1950,7 +1951,7 @@ create_worktablescan_plan(PlannerInfo *root, Path *best_path,
/*
* create_foreignscan_plan
- * Returns a foreignscan plan for the base relation scanned by 'best_path'
+ * Returns a foreignscan plan for the relation scanned by 'best_path'
* with restriction clauses 'scan_clauses' and targetlist 'tlist'.
*/
static ForeignScan *
@@ -1965,9 +1966,11 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path,
ListCell *lc;
int i;
+ Assert(rel->fdwroutine != NULL);
+
/*
- * If we're scanning a base relation, look up the OID.
- * (We can skip this if scanning a join relation.)
+ * If we're scanning a base relation, fetch its OID. (Irrelevant if
+ * scanning a join relation.)
*/
if (scan_relid > 0)
{
@@ -1978,7 +1981,6 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path,
Assert(rte->rtekind == RTE_RELATION);
rel_oid = rte->relid;
}
- Assert(rel->fdwroutine != NULL);
/*
* Sort clauses into best execution order. We do this first since the FDW
@@ -1996,42 +1998,22 @@ create_foreignscan_plan(PlannerInfo *root, ForeignPath *best_path,
scan_plan = rel->fdwroutine->GetForeignPlan(root, rel, rel_oid,
best_path,
tlist, scan_clauses);
- /*
- * Sanity check. There may be resjunk entries in fdw_ps_tlist that
- * are included only to help EXPLAIN deparse plans properly. We require
- * that these are at the end, so that when the executor builds the scan
- * descriptor based on the non-junk entries, it gets the attribute
- * numbers correct.
- */
- if (scan_plan->scan.scanrelid == 0)
- {
- bool found_resjunk = false;
-
- foreach (lc, scan_plan->fdw_ps_tlist)
- {
- TargetEntry *tle = lfirst(lc);
-
- if (tle->resjunk)
- found_resjunk = true;
- else if (found_resjunk)
- elog(ERROR, "junk TLE should not apper prior to valid one");
- }
- }
- /* Set the relids that are represented by this foreign scan for Explain */
- scan_plan->fdw_relids = best_path->path.parent->relids;
/* Copy cost data from Path to Plan; no need to make FDW do this */
copy_path_costsize(&scan_plan->scan.plan, &best_path->path);
- /* Track FDW server-id; no need to make FDW do this */
- scan_plan->fdw_handler = rel->fdw_handler;
+ /* Copy foreign server OID; likewise, no need to make FDW do this */
+ scan_plan->fs_server = rel->serverid;
+
+ /* Likewise, copy the relids that are represented by this foreign scan */
+ scan_plan->fs_relids = best_path->path.parent->relids;
/*
* Replace any outer-relation variables with nestloop params in the qual
* and fdw_exprs expressions. We do this last so that the FDW doesn't
* have to be involved. (Note that parts of fdw_exprs could have come
* from join clauses, so doing this beforehand on the scan_clauses
- * wouldn't work.)
+ * wouldn't work.) We assume fdw_scan_tlist contains no such variables.
*/
if (best_path->path.param_info)
{
@@ -2087,7 +2069,6 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path,
{
CustomScan *cplan;
RelOptInfo *rel = best_path->path.parent;
- ListCell *lc;
/*
* Sort clauses into the best execution order, although custom-scan
@@ -2106,42 +2087,22 @@ create_customscan_plan(PlannerInfo *root, CustomPath *best_path,
scan_clauses);
Assert(IsA(cplan, CustomScan));
- /*
- * Sanity check. There may be resjunk entries in custom_ps_tlist that
- * are included only to help EXPLAIN deparse plans properly. We require
- * that these are at the end, so that when the executor builds the scan
- * descriptor based on the non-junk entries, it gets the attribute
- * numbers correct.
- */
- if (cplan->scan.scanrelid == 0)
- {
- bool found_resjunk = false;
-
- foreach (lc, cplan->custom_ps_tlist)
- {
- TargetEntry *tle = lfirst(lc);
-
- if (tle->resjunk)
- found_resjunk = true;
- else if (found_resjunk)
- elog(ERROR, "junk TLE should not apper prior to valid one");
- }
- }
- /* Set the relids that are represented by this custom scan for Explain */
- cplan->custom_relids = best_path->path.parent->relids;
-
/*
* Copy cost data from Path to Plan; no need to make custom-plan providers
* do this
*/
copy_path_costsize(&cplan->scan.plan, &best_path->path);
+ /* Likewise, copy the relids that are represented by this custom scan */
+ cplan->custom_relids = best_path->path.parent->relids;
+
/*
* Replace any outer-relation variables with nestloop params in the qual
* and custom_exprs expressions. We do this last so that the custom-plan
* provider doesn't have to be involved. (Note that parts of custom_exprs
* could have come from join clauses, so doing this beforehand on the
- * scan_clauses wouldn't work.)
+ * scan_clauses wouldn't work.) We assume custom_scan_tlist contains no
+ * such variables.
*/
if (best_path->path.param_info)
{
@@ -3611,7 +3572,8 @@ make_foreignscan(List *qptlist,
List *qpqual,
Index scanrelid,
List *fdw_exprs,
- List *fdw_private)
+ List *fdw_private,
+ List *fdw_scan_tlist)
{
ForeignScan *node = makeNode(ForeignScan);
Plan *plan = &node->scan.plan;
@@ -3622,8 +3584,13 @@ make_foreignscan(List *qptlist,
plan->lefttree = NULL;
plan->righttree = NULL;
node->scan.scanrelid = scanrelid;
+ /* fs_server will be filled in by create_foreignscan_plan */
+ node->fs_server = InvalidOid;
node->fdw_exprs = fdw_exprs;
node->fdw_private = fdw_private;
+ node->fdw_scan_tlist = fdw_scan_tlist;
+ /* fs_relids will be filled in by create_foreignscan_plan */
+ node->fs_relids = NULL;
/* fsSystemCol will be filled in by create_foreignscan_plan */
node->fsSystemCol = false;
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 612d32571a..fac51c9147 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -86,12 +86,6 @@ static void flatten_unplanned_rtes(PlannerGlobal *glob, RangeTblEntry *rte);
static bool flatten_rtes_walker(Node *node, PlannerGlobal *glob);
static void add_rte_to_flat_rtable(PlannerGlobal *glob, RangeTblEntry *rte);
static Plan *set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset);
-static void set_foreignscan_references(PlannerInfo *root,
- ForeignScan *fscan,
- int rtoffset);
-static void set_customscan_references(PlannerInfo *root,
- CustomScan *cscan,
- int rtoffset);
static Plan *set_indexonlyscan_references(PlannerInfo *root,
IndexOnlyScan *plan,
int rtoffset);
@@ -99,6 +93,12 @@ static Plan *set_subqueryscan_references(PlannerInfo *root,
SubqueryScan *plan,
int rtoffset);
static bool trivial_subqueryscan(SubqueryScan *plan);
+static void set_foreignscan_references(PlannerInfo *root,
+ ForeignScan *fscan,
+ int rtoffset);
+static void set_customscan_references(PlannerInfo *root,
+ CustomScan *cscan,
+ int rtoffset);
static Node *fix_scan_expr(PlannerInfo *root, Node *node, int rtoffset);
static Node *fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context);
static bool fix_scan_expr_walker(Node *node, fix_scan_expr_context *context);
@@ -573,7 +573,6 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
case T_ForeignScan:
set_foreignscan_references(root, (ForeignScan *) plan, rtoffset);
break;
-
case T_CustomScan:
set_customscan_references(root, (CustomScan *) plan, rtoffset);
break;
@@ -890,121 +889,6 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
return plan;
}
-/*
- * set_foreignscan_references
- * Do set_plan_references processing on an ForeignScan
- */
-static void
-set_foreignscan_references(PlannerInfo *root,
- ForeignScan *fscan,
- int rtoffset)
-{
- if (rtoffset > 0)
- {
- Bitmapset *tempset = NULL;
- int x = -1;
-
- while ((x = bms_next_member(fscan->fdw_relids, x)) >= 0)
- tempset = bms_add_member(tempset, x + rtoffset);
- fscan->fdw_relids = tempset;
- }
-
- if (fscan->scan.scanrelid == 0)
- {
- indexed_tlist *pscan_itlist = build_tlist_index(fscan->fdw_ps_tlist);
-
- fscan->scan.plan.targetlist = (List *)
- fix_upper_expr(root,
- (Node *) fscan->scan.plan.targetlist,
- pscan_itlist,
- INDEX_VAR,
- rtoffset);
- fscan->scan.plan.qual = (List *)
- fix_upper_expr(root,
- (Node *) fscan->scan.plan.qual,
- pscan_itlist,
- INDEX_VAR,
- rtoffset);
- fscan->fdw_exprs = (List *)
- fix_upper_expr(root,
- (Node *) fscan->fdw_exprs,
- pscan_itlist,
- INDEX_VAR,
- rtoffset);
- fscan->fdw_ps_tlist =
- fix_scan_list(root, fscan->fdw_ps_tlist, rtoffset);
- pfree(pscan_itlist);
- }
- else
- {
- fscan->scan.scanrelid += rtoffset;
- fscan->scan.plan.targetlist =
- fix_scan_list(root, fscan->scan.plan.targetlist, rtoffset);
- fscan->scan.plan.qual =
- fix_scan_list(root, fscan->scan.plan.qual, rtoffset);
- fscan->fdw_exprs =
- fix_scan_list(root, fscan->fdw_exprs, rtoffset);
- }
-}
-
-/*
- * set_customscan_references
- * Do set_plan_references processing on an CustomScan
- */
-static void
-set_customscan_references(PlannerInfo *root,
- CustomScan *cscan,
- int rtoffset)
-{
- if (rtoffset > 0)
- {
- Bitmapset *tempset = NULL;
- int x = -1;
-
- while ((x = bms_next_member(cscan->custom_relids, x)) >= 0)
- tempset = bms_add_member(tempset, x + rtoffset);
- cscan->custom_relids = tempset;
- }
-
- if (cscan->scan.scanrelid == 0)
- {
- indexed_tlist *pscan_itlist =
- build_tlist_index(cscan->custom_ps_tlist);
-
- cscan->scan.plan.targetlist = (List *)
- fix_upper_expr(root,
- (Node *) cscan->scan.plan.targetlist,
- pscan_itlist,
- INDEX_VAR,
- rtoffset);
- cscan->scan.plan.qual = (List *)
- fix_upper_expr(root,
- (Node *) cscan->scan.plan.qual,
- pscan_itlist,
- INDEX_VAR,
- rtoffset);
- cscan->custom_exprs = (List *)
- fix_upper_expr(root,
- (Node *) cscan->custom_exprs,
- pscan_itlist,
- INDEX_VAR,
- rtoffset);
- cscan->custom_ps_tlist =
- fix_scan_list(root, cscan->custom_ps_tlist, rtoffset);
- pfree(pscan_itlist);
- }
- else
- {
- cscan->scan.scanrelid += rtoffset;
- cscan->scan.plan.targetlist =
- fix_scan_list(root, cscan->scan.plan.targetlist, rtoffset);
- cscan->scan.plan.qual =
- fix_scan_list(root, cscan->scan.plan.qual, rtoffset);
- cscan->custom_exprs =
- fix_scan_list(root, cscan->custom_exprs, rtoffset);
- }
-}
-
/*
* set_indexonlyscan_references
* Do set_plan_references processing on an IndexOnlyScan
@@ -1179,6 +1063,134 @@ trivial_subqueryscan(SubqueryScan *plan)
return true;
}
+/*
+ * set_foreignscan_references
+ * Do set_plan_references processing on a ForeignScan
+ */
+static void
+set_foreignscan_references(PlannerInfo *root,
+ ForeignScan *fscan,
+ int rtoffset)
+{
+ /* Adjust scanrelid if it's valid */
+ if (fscan->scan.scanrelid > 0)
+ fscan->scan.scanrelid += rtoffset;
+
+ if (fscan->fdw_scan_tlist != NIL || fscan->scan.scanrelid == 0)
+ {
+ /* Adjust tlist, qual, fdw_exprs to reference custom scan tuple */
+ indexed_tlist *itlist = build_tlist_index(fscan->fdw_scan_tlist);
+
+ fscan->scan.plan.targetlist = (List *)
+ fix_upper_expr(root,
+ (Node *) fscan->scan.plan.targetlist,
+ itlist,
+ INDEX_VAR,
+ rtoffset);
+ fscan->scan.plan.qual = (List *)
+ fix_upper_expr(root,
+ (Node *) fscan->scan.plan.qual,
+ itlist,
+ INDEX_VAR,
+ rtoffset);
+ fscan->fdw_exprs = (List *)
+ fix_upper_expr(root,
+ (Node *) fscan->fdw_exprs,
+ itlist,
+ INDEX_VAR,
+ rtoffset);
+ pfree(itlist);
+ /* fdw_scan_tlist itself just needs fix_scan_list() adjustments */
+ fscan->fdw_scan_tlist =
+ fix_scan_list(root, fscan->fdw_scan_tlist, rtoffset);
+ }
+ else
+ {
+ /* Adjust tlist, qual, fdw_exprs in the standard way */
+ fscan->scan.plan.targetlist =
+ fix_scan_list(root, fscan->scan.plan.targetlist, rtoffset);
+ fscan->scan.plan.qual =
+ fix_scan_list(root, fscan->scan.plan.qual, rtoffset);
+ fscan->fdw_exprs =
+ fix_scan_list(root, fscan->fdw_exprs, rtoffset);
+ }
+
+ /* Adjust fs_relids if needed */
+ if (rtoffset > 0)
+ {
+ Bitmapset *tempset = NULL;
+ int x = -1;
+
+ while ((x = bms_next_member(fscan->fs_relids, x)) >= 0)
+ tempset = bms_add_member(tempset, x + rtoffset);
+ fscan->fs_relids = tempset;
+ }
+}
+
+/*
+ * set_customscan_references
+ * Do set_plan_references processing on a CustomScan
+ */
+static void
+set_customscan_references(PlannerInfo *root,
+ CustomScan *cscan,
+ int rtoffset)
+{
+ /* Adjust scanrelid if it's valid */
+ if (cscan->scan.scanrelid > 0)
+ cscan->scan.scanrelid += rtoffset;
+
+ if (cscan->custom_scan_tlist != NIL || cscan->scan.scanrelid == 0)
+ {
+ /* Adjust tlist, qual, custom_exprs to reference custom scan tuple */
+ indexed_tlist *itlist = build_tlist_index(cscan->custom_scan_tlist);
+
+ cscan->scan.plan.targetlist = (List *)
+ fix_upper_expr(root,
+ (Node *) cscan->scan.plan.targetlist,
+ itlist,
+ INDEX_VAR,
+ rtoffset);
+ cscan->scan.plan.qual = (List *)
+ fix_upper_expr(root,
+ (Node *) cscan->scan.plan.qual,
+ itlist,
+ INDEX_VAR,
+ rtoffset);
+ cscan->custom_exprs = (List *)
+ fix_upper_expr(root,
+ (Node *) cscan->custom_exprs,
+ itlist,
+ INDEX_VAR,
+ rtoffset);
+ pfree(itlist);
+ /* custom_scan_tlist itself just needs fix_scan_list() adjustments */
+ cscan->custom_scan_tlist =
+ fix_scan_list(root, cscan->custom_scan_tlist, rtoffset);
+ }
+ else
+ {
+ /* Adjust tlist, qual, custom_exprs in the standard way */
+ cscan->scan.plan.targetlist =
+ fix_scan_list(root, cscan->scan.plan.targetlist, rtoffset);
+ cscan->scan.plan.qual =
+ fix_scan_list(root, cscan->scan.plan.qual, rtoffset);
+ cscan->custom_exprs =
+ fix_scan_list(root, cscan->custom_exprs, rtoffset);
+ }
+
+ /* Adjust custom_relids if needed */
+ if (rtoffset > 0)
+ {
+ Bitmapset *tempset = NULL;
+ int x = -1;
+
+ while ((x = bms_next_member(cscan->custom_relids, x)) >= 0)
+ tempset = bms_add_member(tempset, x + rtoffset);
+ cscan->custom_relids = tempset;
+ }
+}
+
/*
* copyVar
* Copy a Var node.
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 0220672fc4..afccee53ac 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -2318,12 +2318,14 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
case T_ForeignScan:
finalize_primnode((Node *) ((ForeignScan *) plan)->fdw_exprs,
&context);
+ /* We assume fdw_scan_tlist cannot contain Params */
context.paramids = bms_add_members(context.paramids, scan_params);
break;
case T_CustomScan:
finalize_primnode((Node *) ((CustomScan *) plan)->custom_exprs,
&context);
+ /* We assume custom_scan_tlist cannot contain Params */
context.paramids = bms_add_members(context.paramids, scan_params);
break;
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 894e0db802..b425680f47 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -380,17 +380,18 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
rel->indexlist = indexinfos;
- /* Grab the fdwroutine info using the relcache, while we have it */
+ /* Grab foreign-table info using the relcache, while we have it */
if (relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
{
- rel->fdw_handler = GetFdwHandlerByRelId(RelationGetRelid(relation));
+ rel->serverid = GetForeignServerIdByRelId(RelationGetRelid(relation));
rel->fdwroutine = GetFdwRoutineForRelation(relation, true);
}
else
{
- rel->fdw_handler = InvalidOid;
+ rel->serverid = InvalidOid;
rel->fdwroutine = NULL;
}
+
heap_close(relation, NoLock);
/*
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 56235663d7..1d635cd6d2 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -14,7 +14,6 @@
*/
#include "postgres.h"
-#include "foreign/fdwapi.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
@@ -122,8 +121,8 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
rel->subplan = NULL;
rel->subroot = NULL;
rel->subplan_params = NIL;
+ rel->serverid = InvalidOid;
rel->fdwroutine = NULL;
- rel->fdw_handler = InvalidOid;
rel->fdw_private = NULL;
rel->baserestrictinfo = NIL;
rel->baserestrictcost.startup = 0;
@@ -385,6 +384,7 @@ build_join_rel(PlannerInfo *root,
joinrel->subplan = NULL;
joinrel->subroot = NULL;
joinrel->subplan_params = NIL;
+ joinrel->serverid = InvalidOid;
joinrel->fdwroutine = NULL;
joinrel->fdw_private = NULL;
joinrel->baserestrictinfo = NIL;
@@ -393,6 +393,17 @@ build_join_rel(PlannerInfo *root,
joinrel->joininfo = NIL;
joinrel->has_eclass_joins = false;
+ /*
+ * Set up foreign-join fields if outer and inner relation are foreign
+ * tables (or joins) belonging to the same server.
+ */
+ if (OidIsValid(outer_rel->serverid) &&
+ inner_rel->serverid == outer_rel->serverid)
+ {
+ joinrel->serverid = outer_rel->serverid;
+ joinrel->fdwroutine = outer_rel->fdwroutine;
+ }
+
/*
* Create a new tlist containing just the vars that need to be output from
* this join (ie, are needed for higher joinclauses or final output).
@@ -428,18 +439,6 @@ build_join_rel(PlannerInfo *root,
set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel,
sjinfo, restrictlist);
- /*
- * Set FDW handler and routine if both outer and inner relation
- * are managed by same FDW driver.
- */
- if (OidIsValid(outer_rel->fdw_handler) &&
- OidIsValid(inner_rel->fdw_handler) &&
- outer_rel->fdw_handler == inner_rel->fdw_handler)
- {
- joinrel->fdw_handler = outer_rel->fdw_handler;
- joinrel->fdwroutine = GetFdwRoutine(joinrel->fdw_handler);
- }
-
/*
* Add the joinrel to the query's joinrel list, and store it into the
* auxiliary hashtable if there is one. NB: GEQO requires us to append
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 4b3cd85ad9..156b5331f3 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -128,8 +128,8 @@ typedef struct
* varlevelsup > 0). We store the PlanState node that is the immediate
* parent of the expression to be deparsed, as well as a list of that
* PlanState's ancestors. In addition, we store its outer and inner subplan
- * state nodes, as well as their plan nodes' targetlists, and the indextlist
- * if the current PlanState is an IndexOnlyScanState. (These fields could
+ * state nodes, as well as their plan nodes' targetlists, and the index tlist
+ * if the current plan node might contain INDEX_VAR Vars. (These fields could
* be derived on-the-fly from the current PlanState, but it seems notationally
* clearer to set them up as separate fields.)
*/
@@ -2586,10 +2586,11 @@ deparse_context_for_plan_rtable(List *rtable, List *rtable_names)
* provide the parent PlanState node. Then OUTER_VAR and INNER_VAR references
* can be resolved by drilling down into the left and right child plans.
* Similarly, INDEX_VAR references can be resolved by reference to the
- * indextlist given in the parent IndexOnlyScan node. (Note that we don't
- * currently support deparsing of indexquals in regular IndexScan or
- * BitmapIndexScan nodes; for those, we can only deparse the indexqualorig
- * fields, which won't contain INDEX_VAR Vars.)
+ * indextlist given in a parent IndexOnlyScan node, or to the scan tlist in
+ * ForeignScan and CustomScan nodes. (Note that we don't currently support
+ * deparsing of indexquals in regular IndexScan or BitmapIndexScan nodes;
+ * for those, we can only deparse the indexqualorig fields, which won't
+ * contain INDEX_VAR Vars.)
*
* Note: planstate really ought to be declared as "PlanState *", but we use
* "Node *" to avoid having to include execnodes.h in ruleutils.h.
@@ -3870,13 +3871,13 @@ set_deparse_planstate(deparse_namespace *dpns, PlanState *ps)
else
dpns->inner_tlist = NIL;
- /* index_tlist is set only if it's an IndexOnlyScan */
+ /* Set up referent for INDEX_VAR Vars, if needed */
if (IsA(ps->plan, IndexOnlyScan))
dpns->index_tlist = ((IndexOnlyScan *) ps->plan)->indextlist;
else if (IsA(ps->plan, ForeignScan))
- dpns->index_tlist = ((ForeignScan *) ps->plan)->fdw_ps_tlist;
+ dpns->index_tlist = ((ForeignScan *) ps->plan)->fdw_scan_tlist;
else if (IsA(ps->plan, CustomScan))
- dpns->index_tlist = ((CustomScan *) ps->plan)->custom_ps_tlist;
+ dpns->index_tlist = ((CustomScan *) ps->plan)->custom_scan_tlist;
else
dpns->index_tlist = NIL;
}
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 1b68b54c7d..6c64609197 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -257,6 +257,7 @@ typedef bool (*ExecScanRecheckMtd) (ScanState *node, TupleTableSlot *slot);
extern TupleTableSlot *ExecScan(ScanState *node, ExecScanAccessMtd accessMtd,
ExecScanRecheckMtd recheckMtd);
extern void ExecAssignScanProjectionInfo(ScanState *node);
+extern void ExecAssignScanProjectionInfoWithVarno(ScanState *node, Index varno);
extern void ExecScanReScan(ScanState *node);
/*
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index c683d9259e..511c96b093 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -47,6 +47,13 @@ typedef void (*ReScanForeignScan_function) (ForeignScanState *node);
typedef void (*EndForeignScan_function) (ForeignScanState *node);
+typedef void (*GetForeignJoinPaths_function) (PlannerInfo *root,
+ RelOptInfo *joinrel,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ JoinType jointype,
+ JoinPathExtraData *extra);
+
typedef void (*AddForeignUpdateTargets_function) (Query *parsetree,
RangeTblEntry *target_rte,
Relation target_relation);
@@ -82,17 +89,6 @@ typedef void (*EndForeignModify_function) (EState *estate,
typedef int (*IsForeignRelUpdatable_function) (Relation rel);
-typedef void (*GetForeignJoinPaths_function) (PlannerInfo *root,
- RelOptInfo *joinrel,
- RelOptInfo *outerrel,
- RelOptInfo *innerrel,
- List *restrictlist,
- JoinType jointype,
- SpecialJoinInfo *sjinfo,
- SemiAntiJoinFactors *semifactors,
- Relids param_source_rels,
- Relids extra_lateral_rels);
-
typedef void (*ExplainForeignScan_function) (ForeignScanState *node,
struct ExplainState *es);
@@ -142,6 +138,9 @@ typedef struct FdwRoutine
* are not provided.
*/
+ /* Functions for remote-join planning */
+ GetForeignJoinPaths_function GetForeignJoinPaths;
+
/* Functions for updating foreign tables */
AddForeignUpdateTargets_function AddForeignUpdateTargets;
PlanForeignModify_function PlanForeignModify;
@@ -161,15 +160,13 @@ typedef struct FdwRoutine
/* Support functions for IMPORT FOREIGN SCHEMA */
ImportForeignSchema_function ImportForeignSchema;
-
- /* Support functions for join push-down */
- GetForeignJoinPaths_function GetForeignJoinPaths;
} FdwRoutine;
/* Functions in foreign/foreign.c */
-extern Oid GetFdwHandlerByRelId(Oid relid);
extern FdwRoutine *GetFdwRoutine(Oid fdwhandler);
+extern Oid GetForeignServerIdByRelId(Oid relid);
+extern FdwRoutine *GetFdwRoutineByServerId(Oid serverid);
extern FdwRoutine *GetFdwRoutineByRelId(Oid relid);
extern FdwRoutine *GetFdwRoutineForRelation(Relation relation, bool makecopy);
extern bool IsImportableForeignTable(const char *tablename,
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index c63492fa0b..9313292222 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -479,32 +479,46 @@ typedef struct WorkTableScan
* fdw_exprs and fdw_private are both under the control of the foreign-data
* wrapper, but fdw_exprs is presumed to contain expression trees and will
* be post-processed accordingly by the planner; fdw_private won't be.
- * An optional fdw_ps_tlist is used to map a reference to an attribute of
- * underlying relation(s) onto a pair of INDEX_VAR and alternative varattno.
- * When fdw_ps_tlist is used, this represents a remote join, and the FDW
- * is responsible for setting this field to an appropriate value.
- * Note that everything in above lists must be copiable by copyObject().
+ * Note that everything in both lists must be copiable by copyObject().
* One way to store an arbitrary blob of bytes is to represent it as a bytea
* Const. Usually, though, you'll be better off choosing a representation
* that can be dumped usefully by nodeToString().
+ *
+ * fdw_scan_tlist is a targetlist describing the contents of the scan tuple
+ * returned by the FDW; it can be NIL if the scan tuple matches the declared
+ * rowtype of the foreign table, which is the normal case for a simple foreign
+ * table scan. (If the plan node represents a foreign join, fdw_scan_tlist
+ * is required since there is no rowtype available from the system catalogs.)
+ * When fdw_scan_tlist is provided, Vars in the node's tlist and quals must
+ * have varno INDEX_VAR, and their varattnos correspond to resnos in the
+ * fdw_scan_tlist (which are also column numbers in the actual scan tuple).
+ * fdw_scan_tlist is never actually executed; it just holds expression trees
+ * describing what is in the scan tuple's columns.
+ *
+ * When the plan node represents a foreign join, scan.scanrelid is zero and
+ * fs_relids must be consulted to identify the join relation. (fs_relids
+ * is valid for simple scans as well, but will always match scan.scanrelid.)
* ----------------
*/
typedef struct ForeignScan
{
Scan scan;
- Oid fdw_handler; /* OID of FDW handler */
+ Oid fs_server; /* OID of foreign server */
List *fdw_exprs; /* expressions that FDW may evaluate */
- List *fdw_ps_tlist; /* tlist, if replacing a join */
List *fdw_private; /* private data for FDW */
- Bitmapset *fdw_relids; /* RTIs generated by this scan */
+ List *fdw_scan_tlist; /* optional tlist describing scan tuple */
+ Bitmapset *fs_relids; /* RTIs generated by this scan */
bool fsSystemCol; /* true if any "system column" is needed */
} ForeignScan;
/* ----------------
* CustomScan node
*
- * The comments for ForeignScan's fdw_exprs, fdw_varmap and fdw_private fields
- * apply equally to custom_exprs, custom_ps_tlist and custom_private.
+ * The comments for ForeignScan's fdw_exprs, fdw_private, fdw_scan_tlist,
+ * and fs_relids fields apply equally to CustomScan's custom_exprs,
+ * custom_private, custom_scan_tlist, and custom_relids fields. The
+ * convention of setting scan.scanrelid to zero for joins applies as well.
+ *
* Note that since Plan trees can be copied, custom scan providers *must*
* fit all plan data they need into those fields; embedding CustomScan in
* a larger struct will not work.
@@ -528,8 +542,9 @@ typedef struct CustomScan
Scan scan;
uint32 flags; /* mask of CUSTOMPATH_* flags, see relation.h */
List *custom_exprs; /* expressions that custom code may evaluate */
- List *custom_ps_tlist;/* tlist, if replacing a join */
List *custom_private; /* private data for custom code */
+ List *custom_scan_tlist; /* optional tlist describing scan
+ * tuple */
Bitmapset *custom_relids; /* RTIs generated by this scan */
const CustomScanMethods *methods;
} CustomScan;
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 8f2c64847e..f10ae4efa8 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -127,9 +127,13 @@ typedef struct Expr
* upper-level plan nodes are reassigned to point to the outputs of their
* subplans; for example, in a join node varno becomes INNER_VAR or OUTER_VAR
* and varattno becomes the index of the proper element of that subplan's
- * target list. But varnoold/varoattno continue to hold the original values.
- * The code doesn't really need varnoold/varoattno, but they are very useful
- * for debugging and interpreting completed plans, so we keep them around.
+ * target list. Similarly, INDEX_VAR is used to identify Vars that reference
+ * an index column rather than a heap column. (In ForeignScan and CustomScan
+ * plan nodes, INDEX_VAR is abused to signify references to columns of a
+ * custom scan tuple type.) In all these cases, varnoold/varoattno hold the
+ * original values. The code doesn't really need varnoold/varoattno, but they
+ * are very useful for debugging and interpreting completed plans, so we keep
+ * them around.
*/
#define INNER_VAR 65000 /* reference to inner subplan */
#define OUTER_VAR 65001 /* reference to outer subplan */
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 1713d298de..d3ee61c4d0 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -365,18 +365,21 @@ typedef struct PlannerInfo
* subplan - plan for subquery (NULL if it's not a subquery)
* subroot - PlannerInfo for subquery (NULL if it's not a subquery)
* subplan_params - list of PlannerParamItems to be passed to subquery
- * fdwroutine - function hooks for FDW, if foreign table (else NULL)
- * fdw_handler - OID of FDW handler, if foreign table (else InvalidOid)
- * fdw_private - private state for FDW, if foreign table (else NULL)
*
* Note: for a subquery, tuples, subplan, subroot are not set immediately
* upon creation of the RelOptInfo object; they are filled in when
- * set_subquery_pathlist processes the object. Likewise, fdwroutine
- * and fdw_private are filled during initial path creation.
+ * set_subquery_pathlist processes the object.
*
* For otherrels that are appendrel members, these fields are filled
* in just as for a baserel.
*
+ * If the relation is either a foreign table or a join of foreign tables that
+ * all belong to the same foreign server, these fields will be set:
+ *
+ * serverid - OID of foreign server, if foreign table (else InvalidOid)
+ * fdwroutine - function hooks for FDW, if foreign table (else NULL)
+ * fdw_private - private state for FDW, if foreign table (else NULL)
+ *
* The presence of the remaining fields depends on the restrictions
* and joins that the relation participates in:
*
@@ -460,10 +463,12 @@ typedef struct RelOptInfo
struct Plan *subplan; /* if subquery */
PlannerInfo *subroot; /* if subquery */
List *subplan_params; /* if subquery */
+
+ /* Information about foreign tables and foreign joins */
+ Oid serverid; /* identifies server for the table or join */
/* use "struct FdwRoutine" to avoid including fdwapi.h here */
- struct FdwRoutine *fdwroutine; /* if foreign table */
- Oid fdw_handler; /* if foreign table */
- void *fdw_private; /* if foreign table */
+ struct FdwRoutine *fdwroutine;
+ void *fdw_private;
/* used by various scans and joins: */
List *baserestrictinfo; /* RestrictInfo structures (if base
@@ -523,7 +528,7 @@ typedef struct IndexOptInfo
bool *reverse_sort; /* is sort order descending? */
bool *nulls_first; /* do NULLs come first in the sort order? */
bool *canreturn; /* which index cols can be returned in an
- index-only scan? */
+ * index-only scan? */
Oid relam; /* OID of the access method (in pg_am) */
RegProcedure amcostestimate; /* OID of the access method's cost fcn */
@@ -1667,6 +1672,28 @@ typedef struct SemiAntiJoinFactors
Selectivity match_count;
} SemiAntiJoinFactors;
+/*
+ * Struct for extra information passed to subroutines of add_paths_to_joinrel
+ *
+ * restrictlist contains all of the RestrictInfo nodes for restriction
+ * clauses that apply to this join
+ * mergeclause_list is a list of RestrictInfo nodes for available
+ * mergejoin clauses in this join
+ * sjinfo is extra info about special joins for selectivity estimation
+ * semifactors is as shown above (only valid for SEMI or ANTI joins)
+ * param_source_rels are OK targets for parameterization of result paths
+ * extra_lateral_rels are additional parameterization for result paths
+ */
+typedef struct JoinPathExtraData
+{
+ List *restrictlist;
+ List *mergeclause_list;
+ SpecialJoinInfo *sjinfo;
+ SemiAntiJoinFactors semifactors;
+ Relids param_source_rels;
+ Relids extra_lateral_rels;
+} JoinPathExtraData;
+
/*
* For speed reasons, cost estimation for join paths is performed in two
* phases: the first phase tries to quickly derive a lower bound for the
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index c42c69d746..3e2378aeb7 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -32,15 +32,11 @@ extern PGDLLIMPORT set_rel_pathlist_hook_type set_rel_pathlist_hook;
/* Hook for plugins to get control in add_paths_to_joinrel() */
typedef void (*set_join_pathlist_hook_type) (PlannerInfo *root,
- RelOptInfo *joinrel,
- RelOptInfo *outerrel,
- RelOptInfo *innerrel,
- List *restrictlist,
- JoinType jointype,
- SpecialJoinInfo *sjinfo,
- SemiAntiJoinFactors *semifactors,
- Relids param_source_rels,
- Relids extra_lateral_rels);
+ RelOptInfo *joinrel,
+ RelOptInfo *outerrel,
+ RelOptInfo *innerrel,
+ JoinType jointype,
+ JoinPathExtraData *extra);
extern PGDLLIMPORT set_join_pathlist_hook_type set_join_pathlist_hook;
/* Hook for plugins to replace standard_join_search() */
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 1d4ab0488e..da15fca1f6 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -41,11 +41,11 @@ extern Plan *optimize_minmax_aggregates(PlannerInfo *root, List *tlist,
* prototypes for plan/createplan.c
*/
extern Plan *create_plan(PlannerInfo *root, Path *best_path);
-extern Plan *create_plan_recurse(PlannerInfo *root, Path *best_path);
extern SubqueryScan *make_subqueryscan(List *qptlist, List *qpqual,
Index scanrelid, Plan *subplan);
extern ForeignScan *make_foreignscan(List *qptlist, List *qpqual,
- Index scanrelid, List *fdw_exprs, List *fdw_private);
+ Index scanrelid, List *fdw_exprs, List *fdw_private,
+ List *fdw_scan_tlist);
extern Append *make_append(List *appendplans, List *tlist);
extern RecursiveUnion *make_recursive_union(List *tlist,
Plan *lefttree, Plan *righttree, int wtParam,
--
cgit v1.2.3
From d02f16470f117db3038dbfd87662d5f0eb5a2a9b Mon Sep 17 00:00:00 2001
From: Peter Eisentraut
Date: Mon, 11 May 2015 20:38:55 -0400
Subject: Replace some appendStringInfo* calls with more appropriate variants
Author: David Rowley
---
contrib/postgres_fdw/postgres_fdw.c | 4 ++--
contrib/test_decoding/test_decoding.c | 2 +-
src/backend/access/rmgrdesc/smgrdesc.c | 2 +-
src/backend/access/rmgrdesc/xlogdesc.c | 4 ++--
src/backend/catalog/namespace.c | 2 +-
src/backend/utils/adt/ruleutils.c | 2 +-
6 files changed, 8 insertions(+), 8 deletions(-)
(limited to 'contrib/postgres_fdw/postgres_fdw.c')
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index ec89b25f61..0c44260842 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -2734,7 +2734,7 @@ postgresImportForeignSchema(ImportForeignSchemaStmt *stmt, Oid serverOid)
appendStringInfoString(&buf, ", ");
deparseStringLiteral(&buf, rv->relname);
}
- appendStringInfoString(&buf, ")");
+ appendStringInfoChar(&buf, ')');
}
/* Append ORDER BY at the end of query to ensure output ordering */
@@ -2798,7 +2798,7 @@ postgresImportForeignSchema(ImportForeignSchemaStmt *stmt, Oid serverOid)
*/
appendStringInfoString(&buf, " OPTIONS (column_name ");
deparseStringLiteral(&buf, attname);
- appendStringInfoString(&buf, ")");
+ appendStringInfoChar(&buf, ')');
/* Add COLLATE if needed */
if (import_collate && collname != NULL && collnamespace != NULL)
diff --git a/contrib/test_decoding/test_decoding.c b/contrib/test_decoding/test_decoding.c
index bca03ee21b..ae2aca8a8d 100644
--- a/contrib/test_decoding/test_decoding.c
+++ b/contrib/test_decoding/test_decoding.c
@@ -419,7 +419,7 @@ pg_decode_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn,
get_namespace_name(
get_rel_namespace(RelationGetRelid(relation))),
NameStr(class_form->relname)));
- appendStringInfoString(ctx->out, ":");
+ appendStringInfoChar(ctx->out, ':');
switch (change->action)
{
diff --git a/src/backend/access/rmgrdesc/smgrdesc.c b/src/backend/access/rmgrdesc/smgrdesc.c
index 4ae08b1988..56d02c325c 100644
--- a/src/backend/access/rmgrdesc/smgrdesc.c
+++ b/src/backend/access/rmgrdesc/smgrdesc.c
@@ -29,7 +29,7 @@ smgr_desc(StringInfo buf, XLogReaderState *record)
xl_smgr_create *xlrec = (xl_smgr_create *) rec;
char *path = relpathperm(xlrec->rnode, xlrec->forkNum);
- appendStringInfo(buf, "%s", path);
+ appendStringInfoString(buf, path);
pfree(path);
}
else if (info == XLOG_SMGR_TRUNCATE)
diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c
index 2c432f3e2a..4f2913683a 100644
--- a/src/backend/access/rmgrdesc/xlogdesc.c
+++ b/src/backend/access/rmgrdesc/xlogdesc.c
@@ -75,7 +75,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
{
xl_restore_point *xlrec = (xl_restore_point *) rec;
- appendStringInfo(buf, "%s", xlrec->rp_name);
+ appendStringInfoString(buf, xlrec->rp_name);
}
else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT)
{
@@ -125,7 +125,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
bool fpw;
memcpy(&fpw, rec, sizeof(bool));
- appendStringInfo(buf, "%s", fpw ? "true" : "false");
+ appendStringInfoString(buf, fpw ? "true" : "false");
}
else if (info == XLOG_END_OF_RECOVERY)
{
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index 2f6d697d82..b16af64bb3 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -2937,7 +2937,7 @@ NameListToString(List *names)
if (IsA(name, String))
appendStringInfoString(&string, strVal(name));
else if (IsA(name, A_Star))
- appendStringInfoString(&string, "*");
+ appendStringInfoChar(&string, '*');
else
elog(ERROR, "unexpected node type in name list: %d",
(int) nodeTag(name));
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 156b5331f3..903e80aea3 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -2174,7 +2174,7 @@ print_function_rettype(StringInfo buf, HeapTuple proctup)
appendStringInfoString(&rbuf, "TABLE(");
ntabargs = print_function_arguments(&rbuf, proctup, true, false);
if (ntabargs > 0)
- appendStringInfoString(&rbuf, ")");
+ appendStringInfoChar(&rbuf, ')');
else
resetStringInfo(&rbuf);
}
--
cgit v1.2.3
From 0bb8528b5c738b45d0b65844750588c16bf75c52 Mon Sep 17 00:00:00 2001
From: Tom Lane
Date: Wed, 13 May 2015 14:05:17 -0400
Subject: Fix postgres_fdw to return the right ctid value in EvalPlanQual
cases.
If a postgres_fdw foreign table is a non-locked source relation in an
UPDATE, DELETE, or SELECT FOR UPDATE/SHARE, and the query selects its
ctid column, the wrong value would be returned if an EvalPlanQual
recheck occurred. This happened because the foreign table's result row
was copied via the ROW_MARK_COPY code path, and EvalPlanQualFetchRowMarks
just unconditionally set the reconstructed tuple's t_self to "invalid".
To fix that, we can have EvalPlanQualFetchRowMarks copy the composite
datum's t_ctid field, and be sure to initialize that along with t_self
when postgres_fdw constructs a tuple to return.
If we just did that much then EvalPlanQualFetchRowMarks would start
returning "(0,0)" as ctid for all other ROW_MARK_COPY cases, which perhaps
does not matter much, but then again maybe it might. The cause of that is
that heap_form_tuple, which is the ultimate source of all composite datums,
simply leaves t_ctid as zeroes in newly constructed tuples. That seems
like a bad idea on general principles: a field that's really not been
initialized shouldn't appear to have a valid value. So let's eat the
trivial additional overhead of doing "ItemPointerSetInvalid(&(td->t_ctid))"
in heap_form_tuple.
This closes out our handling of Etsuro Fujita's report that tableoid and
ctid weren't correctly set in postgres_fdw EvalPlanQual cases. Along the
way we did a great deal of work to improve FDWs' ability to control row
locking behavior; which was not wasted effort by any means, but it didn't
end up being a fix for this problem because that feature would be too
expensive for postgres_fdw to use all the time.
Although the fix for the tableoid misbehavior was back-patched, I'm
hesitant to do so here; it seems far less likely that people would care
about remote ctid than tableoid, and even such a minor behavioral change
as this in heap_form_tuple is perhaps best not back-patched. So commit
to HEAD only, at least for the moment.
Etsuro Fujita, with some adjustments by me
---
contrib/postgres_fdw/postgres_fdw.c | 8 +++++++-
src/backend/access/common/heaptuple.c | 2 ++
src/backend/executor/execMain.c | 5 +++--
3 files changed, 12 insertions(+), 3 deletions(-)
(limited to 'contrib/postgres_fdw/postgres_fdw.c')
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 0c44260842..4f7123b84f 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -2964,8 +2964,14 @@ make_tuple_from_result_row(PGresult *res,
tuple = heap_form_tuple(tupdesc, values, nulls);
+ /*
+ * If we have a CTID to return, install it in both t_self and t_ctid.
+ * t_self is the normal place, but if the tuple is converted to a
+ * composite Datum, t_self will be lost; setting t_ctid allows CTID to be
+ * preserved during EvalPlanQual re-evaluations (see ROW_MARK_COPY code).
+ */
if (ctid)
- tuple->t_self = *ctid;
+ tuple->t_self = tuple->t_data->t_ctid = *ctid;
/* Clean up */
MemoryContextReset(temp_context);
diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c
index 6cd4e8e11a..f58f81e1ed 100644
--- a/src/backend/access/common/heaptuple.c
+++ b/src/backend/access/common/heaptuple.c
@@ -727,6 +727,8 @@ heap_form_tuple(TupleDesc tupleDescriptor,
HeapTupleHeaderSetDatumLength(td, len);
HeapTupleHeaderSetTypeId(td, tupleDescriptor->tdtypeid);
HeapTupleHeaderSetTypMod(td, tupleDescriptor->tdtypmod);
+ /* We also make sure that t_ctid is invalid unless explicitly set */
+ ItemPointerSetInvalid(&(td->t_ctid));
HeapTupleHeaderSetNatts(td, numberOfAttributes);
td->t_hoff = hoff;
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 43d3c44c82..7c29b4b42a 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -2613,10 +2613,11 @@ EvalPlanQualFetchRowMarks(EPQState *epqstate)
/* build a temporary HeapTuple control structure */
tuple.t_len = HeapTupleHeaderGetDatumLength(td);
- ItemPointerSetInvalid(&(tuple.t_self));
+ tuple.t_data = td;
/* relation might be a foreign table, if so provide tableoid */
tuple.t_tableOid = erm->relid;
- tuple.t_data = td;
+ /* also copy t_ctid in case there's valid data there */
+ tuple.t_self = td->t_ctid;
/* copy and store tuple */
EvalPlanQualSetTuple(epqstate, erm->rti,
--
cgit v1.2.3
From 83e176ec18d2a91dbea1d0d1bd94c38dc47cd77c Mon Sep 17 00:00:00 2001
From: Simon Riggs
Date: Fri, 15 May 2015 04:02:54 +0200
Subject: Separate block sampling functions
Refactoring ahead of tablesample patch
Requested and reviewed by Michael Paquier
Petr Jelinek
---
contrib/file_fdw/file_fdw.c | 9 +-
contrib/postgres_fdw/postgres_fdw.c | 10 +-
src/backend/commands/analyze.c | 225 +----------------------------------
src/backend/utils/misc/Makefile | 2 +-
src/backend/utils/misc/sampling.c | 226 ++++++++++++++++++++++++++++++++++++
src/include/commands/vacuum.h | 3 -
src/include/utils/sampling.h | 44 +++++++
7 files changed, 287 insertions(+), 232 deletions(-)
create mode 100644 src/backend/utils/misc/sampling.c
create mode 100644 src/include/utils/sampling.h
(limited to 'contrib/postgres_fdw/postgres_fdw.c')
diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c
index 4c56444751..ea4ed40082 100644
--- a/contrib/file_fdw/file_fdw.c
+++ b/contrib/file_fdw/file_fdw.c
@@ -34,6 +34,7 @@
#include "optimizer/var.h"
#include "utils/memutils.h"
#include "utils/rel.h"
+#include "utils/sampling.h"
PG_MODULE_MAGIC;
@@ -1006,7 +1007,7 @@ file_acquire_sample_rows(Relation onerel, int elevel,
{
int numrows = 0;
double rowstoskip = -1; /* -1 means not set yet */
- double rstate;
+ ReservoirStateData rstate;
TupleDesc tupDesc;
Datum *values;
bool *nulls;
@@ -1044,7 +1045,7 @@ file_acquire_sample_rows(Relation onerel, int elevel,
ALLOCSET_DEFAULT_MAXSIZE);
/* Prepare for sampling rows */
- rstate = anl_init_selection_state(targrows);
+ reservoir_init_selection_state(&rstate, targrows);
/* Set up callback to identify error line number. */
errcallback.callback = CopyFromErrorCallback;
@@ -1088,7 +1089,7 @@ file_acquire_sample_rows(Relation onerel, int elevel,
* not-yet-incremented value of totalrows as t.
*/
if (rowstoskip < 0)
- rowstoskip = anl_get_next_S(*totalrows, targrows, &rstate);
+ rowstoskip = reservoir_get_next_S(&rstate, *totalrows, targrows);
if (rowstoskip <= 0)
{
@@ -1096,7 +1097,7 @@ file_acquire_sample_rows(Relation onerel, int elevel,
* Found a suitable tuple, so save it, replacing one old tuple
* at random
*/
- int k = (int) (targrows * anl_random_fract());
+ int k = (int) (targrows * sampler_random_fract());
Assert(k >= 0 && k < targrows);
heap_freetuple(rows[k]);
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 4f7123b84f..43288c27b0 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -37,6 +37,7 @@
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/rel.h"
+#include "utils/sampling.h"
PG_MODULE_MAGIC;
@@ -202,7 +203,7 @@ typedef struct PgFdwAnalyzeState
/* for random sampling */
double samplerows; /* # of rows fetched */
double rowstoskip; /* # of rows to skip before next sample */
- double rstate; /* random state */
+ ReservoirStateData rstate; /* state for reservoir sampling*/
/* working memory contexts */
MemoryContext anl_cxt; /* context for per-analyze lifespan data */
@@ -2411,7 +2412,7 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel,
astate.numrows = 0;
astate.samplerows = 0;
astate.rowstoskip = -1; /* -1 means not set yet */
- astate.rstate = anl_init_selection_state(targrows);
+ reservoir_init_selection_state(&astate.rstate, targrows);
/* Remember ANALYZE context, and create a per-tuple temp context */
astate.anl_cxt = CurrentMemoryContext;
@@ -2551,13 +2552,12 @@ analyze_row_processor(PGresult *res, int row, PgFdwAnalyzeState *astate)
* analyze.c; see Jeff Vitter's paper.
*/
if (astate->rowstoskip < 0)
- astate->rowstoskip = anl_get_next_S(astate->samplerows, targrows,
- &astate->rstate);
+ astate->rowstoskip = reservoir_get_next_S(&astate->rstate, astate->samplerows, targrows);
if (astate->rowstoskip <= 0)
{
/* Choose a random reservoir element to replace. */
- pos = (int) (targrows * anl_random_fract());
+ pos = (int) (targrows * sampler_random_fract());
Assert(pos >= 0 && pos < targrows);
heap_freetuple(astate->rows[pos]);
}
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 15ec0ad551..952cf204a0 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -50,23 +50,13 @@
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/pg_rusage.h"
+#include "utils/sampling.h"
#include "utils/sortsupport.h"
#include "utils/syscache.h"
#include "utils/timestamp.h"
#include "utils/tqual.h"
-/* Data structure for Algorithm S from Knuth 3.4.2 */
-typedef struct
-{
- BlockNumber N; /* number of blocks, known in advance */
- int n; /* desired sample size */
- BlockNumber t; /* current block number */
- int m; /* blocks selected so far */
-} BlockSamplerData;
-
-typedef BlockSamplerData *BlockSampler;
-
/* Per-index data for ANALYZE */
typedef struct AnlIndexData
{
@@ -89,10 +79,6 @@ static void do_analyze_rel(Relation onerel, int options,
VacuumParams *params, List *va_cols,
AcquireSampleRowsFunc acquirefunc, BlockNumber relpages,
bool inh, bool in_outer_xact, int elevel);
-static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
- int samplesize);
-static bool BlockSampler_HasMore(BlockSampler bs);
-static BlockNumber BlockSampler_Next(BlockSampler bs);
static void compute_index_stats(Relation onerel, double totalrows,
AnlIndexData *indexdata, int nindexes,
HeapTuple *rows, int numrows,
@@ -950,94 +936,6 @@ examine_attribute(Relation onerel, int attnum, Node *index_expr)
return stats;
}
-/*
- * BlockSampler_Init -- prepare for random sampling of blocknumbers
- *
- * BlockSampler is used for stage one of our new two-stage tuple
- * sampling mechanism as discussed on pgsql-hackers 2004-04-02 (subject
- * "Large DB"). It selects a random sample of samplesize blocks out of
- * the nblocks blocks in the table. If the table has less than
- * samplesize blocks, all blocks are selected.
- *
- * Since we know the total number of blocks in advance, we can use the
- * straightforward Algorithm S from Knuth 3.4.2, rather than Vitter's
- * algorithm.
- */
-static void
-BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize)
-{
- bs->N = nblocks; /* measured table size */
-
- /*
- * If we decide to reduce samplesize for tables that have less or not much
- * more than samplesize blocks, here is the place to do it.
- */
- bs->n = samplesize;
- bs->t = 0; /* blocks scanned so far */
- bs->m = 0; /* blocks selected so far */
-}
-
-static bool
-BlockSampler_HasMore(BlockSampler bs)
-{
- return (bs->t < bs->N) && (bs->m < bs->n);
-}
-
-static BlockNumber
-BlockSampler_Next(BlockSampler bs)
-{
- BlockNumber K = bs->N - bs->t; /* remaining blocks */
- int k = bs->n - bs->m; /* blocks still to sample */
- double p; /* probability to skip block */
- double V; /* random */
-
- Assert(BlockSampler_HasMore(bs)); /* hence K > 0 and k > 0 */
-
- if ((BlockNumber) k >= K)
- {
- /* need all the rest */
- bs->m++;
- return bs->t++;
- }
-
- /*----------
- * It is not obvious that this code matches Knuth's Algorithm S.
- * Knuth says to skip the current block with probability 1 - k/K.
- * If we are to skip, we should advance t (hence decrease K), and
- * repeat the same probabilistic test for the next block. The naive
- * implementation thus requires an anl_random_fract() call for each block
- * number. But we can reduce this to one anl_random_fract() call per
- * selected block, by noting that each time the while-test succeeds,
- * we can reinterpret V as a uniform random number in the range 0 to p.
- * Therefore, instead of choosing a new V, we just adjust p to be
- * the appropriate fraction of its former value, and our next loop
- * makes the appropriate probabilistic test.
- *
- * We have initially K > k > 0. If the loop reduces K to equal k,
- * the next while-test must fail since p will become exactly zero
- * (we assume there will not be roundoff error in the division).
- * (Note: Knuth suggests a "<=" loop condition, but we use "<" just
- * to be doubly sure about roundoff error.) Therefore K cannot become
- * less than k, which means that we cannot fail to select enough blocks.
- *----------
- */
- V = anl_random_fract();
- p = 1.0 - (double) k / (double) K;
- while (V < p)
- {
- /* skip */
- bs->t++;
- K--; /* keep K == N - t */
-
- /* adjust p to be new cutoff point in reduced range */
- p *= 1.0 - (double) k / (double) K;
- }
-
- /* select */
- bs->m++;
- return bs->t++;
-}
-
/*
* acquire_sample_rows -- acquire a random sample of rows from the table
*
@@ -1084,7 +982,7 @@ acquire_sample_rows(Relation onerel, int elevel,
BlockNumber totalblocks;
TransactionId OldestXmin;
BlockSamplerData bs;
- double rstate;
+ ReservoirStateData rstate;
Assert(targrows > 0);
@@ -1094,9 +992,9 @@ acquire_sample_rows(Relation onerel, int elevel,
OldestXmin = GetOldestXmin(onerel, true);
/* Prepare for sampling block numbers */
- BlockSampler_Init(&bs, totalblocks, targrows);
+ BlockSampler_Init(&bs, totalblocks, targrows, random());
/* Prepare for sampling rows */
- rstate = anl_init_selection_state(targrows);
+ reservoir_init_selection_state(&rstate, targrows);
/* Outer loop over blocks to sample */
while (BlockSampler_HasMore(&bs))
@@ -1244,8 +1142,7 @@ acquire_sample_rows(Relation onerel, int elevel,
* t.
*/
if (rowstoskip < 0)
- rowstoskip = anl_get_next_S(samplerows, targrows,
- &rstate);
+ rowstoskip = reservoir_get_next_S(&rstate, samplerows, targrows);
if (rowstoskip <= 0)
{
@@ -1253,7 +1150,7 @@ acquire_sample_rows(Relation onerel, int elevel,
* Found a suitable tuple, so save it, replacing one
* old tuple at random
*/
- int k = (int) (targrows * anl_random_fract());
+ int k = (int) (targrows * sampler_random_fract());
Assert(k >= 0 && k < targrows);
heap_freetuple(rows[k]);
@@ -1312,116 +1209,6 @@ acquire_sample_rows(Relation onerel, int elevel,
return numrows;
}
-/* Select a random value R uniformly distributed in (0 - 1) */
-double
-anl_random_fract(void)
-{
- return ((double) random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
-}
-
-/*
- * These two routines embody Algorithm Z from "Random sampling with a
- * reservoir" by Jeffrey S. Vitter, in ACM Trans. Math. Softw. 11, 1
- * (Mar. 1985), Pages 37-57. Vitter describes his algorithm in terms
- * of the count S of records to skip before processing another record.
- * It is computed primarily based on t, the number of records already read.
- * The only extra state needed between calls is W, a random state variable.
- *
- * anl_init_selection_state computes the initial W value.
- *
- * Given that we've already read t records (t >= n), anl_get_next_S
- * determines the number of records to skip before the next record is
- * processed.
- */
-double
-anl_init_selection_state(int n)
-{
- /* Initial value of W (for use when Algorithm Z is first applied) */
- return exp(-log(anl_random_fract()) / n);
-}
-
-double
-anl_get_next_S(double t, int n, double *stateptr)
-{
- double S;
-
- /* The magic constant here is T from Vitter's paper */
- if (t <= (22.0 * n))
- {
- /* Process records using Algorithm X until t is large enough */
- double V,
- quot;
-
- V = anl_random_fract(); /* Generate V */
- S = 0;
- t += 1;
- /* Note: "num" in Vitter's code is always equal to t - n */
- quot = (t - (double) n) / t;
- /* Find min S satisfying (4.1) */
- while (quot > V)
- {
- S += 1;
- t += 1;
- quot *= (t - (double) n) / t;
- }
- }
- else
- {
- /* Now apply Algorithm Z */
- double W = *stateptr;
- double term = t - (double) n + 1;
-
- for (;;)
- {
- double numer,
- numer_lim,
- denom;
- double U,
- X,
- lhs,
- rhs,
- y,
- tmp;
-
- /* Generate U and X */
- U = anl_random_fract();
- X = t * (W - 1.0);
- S = floor(X); /* S is tentatively set to floor(X) */
- /* Test if U <= h(S)/cg(X) in the manner of (6.3) */
- tmp = (t + 1) / term;
- lhs = exp(log(((U * tmp * tmp) * (term + S)) / (t + X)) / n);
- rhs = (((t + X) / (term + S)) * term) / t;
- if (lhs <= rhs)
- {
- W = rhs / lhs;
- break;
- }
- /* Test if U <= f(S)/cg(X) */
- y = (((U * (t + 1)) / term) * (t + S + 1)) / (t + X);
- if ((double) n < S)
- {
- denom = t;
- numer_lim = term + S;
- }
- else
- {
- denom = t - (double) n + S;
- numer_lim = t + 1;
- }
- for (numer = t + S; numer >= numer_lim; numer -= 1)
- {
- y *= numer / denom;
- denom -= 1;
- }
- W = exp(-log(anl_random_fract()) / n); /* Generate W in advance */
- if (exp(log(y) / n) <= (t + X) / t)
- break;
- }
- *stateptr = W;
- }
- return S;
-}
-
/*
* qsort comparator for sorting rows[] array
*/
diff --git a/src/backend/utils/misc/Makefile b/src/backend/utils/misc/Makefile
index 378b77e0bc..788910157d 100644
--- a/src/backend/utils/misc/Makefile
+++ b/src/backend/utils/misc/Makefile
@@ -15,7 +15,7 @@ include $(top_builddir)/src/Makefile.global
override CPPFLAGS := -I. -I$(srcdir) $(CPPFLAGS)
OBJS = guc.o help_config.o pg_rusage.o ps_status.o rls.o \
- superuser.o timeout.o tzparser.o
+ sampling.o superuser.o timeout.o tzparser.o
# This location might depend on the installation directories. Therefore
# we can't subsitute it into pg_config.h.
diff --git a/src/backend/utils/misc/sampling.c b/src/backend/utils/misc/sampling.c
new file mode 100644
index 0000000000..1eeabaf158
--- /dev/null
+++ b/src/backend/utils/misc/sampling.c
@@ -0,0 +1,226 @@
+/*-------------------------------------------------------------------------
+ *
+ * sampling.c
+ * Relation block sampling routines.
+ *
+ * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/misc/sampling.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include
+
+#include "utils/sampling.h"
+
+
+/*
+ * BlockSampler_Init -- prepare for random sampling of blocknumbers
+ *
+ * BlockSampler provides algorithm for block level sampling of a relation
+ * as discussed on pgsql-hackers 2004-04-02 (subject "Large DB")
+ * It selects a random sample of samplesize blocks out of
+ * the nblocks blocks in the table. If the table has less than
+ * samplesize blocks, all blocks are selected.
+ *
+ * Since we know the total number of blocks in advance, we can use the
+ * straightforward Algorithm S from Knuth 3.4.2, rather than Vitter's
+ * algorithm.
+ */
+void
+BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize,
+ long randseed)
+{
+ bs->N = nblocks; /* measured table size */
+
+ /*
+ * If we decide to reduce samplesize for tables that have less or not much
+ * more than samplesize blocks, here is the place to do it.
+ */
+ bs->n = samplesize;
+ bs->t = 0; /* blocks scanned so far */
+ bs->m = 0; /* blocks selected so far */
+}
+
+bool
+BlockSampler_HasMore(BlockSampler bs)
+{
+ return (bs->t < bs->N) && (bs->m < bs->n);
+}
+
+BlockNumber
+BlockSampler_Next(BlockSampler bs)
+{
+ BlockNumber K = bs->N - bs->t; /* remaining blocks */
+ int k = bs->n - bs->m; /* blocks still to sample */
+ double p; /* probability to skip block */
+ double V; /* random */
+
+ Assert(BlockSampler_HasMore(bs)); /* hence K > 0 and k > 0 */
+
+ if ((BlockNumber) k >= K)
+ {
+ /* need all the rest */
+ bs->m++;
+ return bs->t++;
+ }
+
+ /*----------
+ * It is not obvious that this code matches Knuth's Algorithm S.
+ * Knuth says to skip the current block with probability 1 - k/K.
+ * If we are to skip, we should advance t (hence decrease K), and
+ * repeat the same probabilistic test for the next block. The naive
+ * implementation thus requires an sampler_random_fract() call for each
+ * block number. But we can reduce this to one sampler_random_fract()
+ * call per selected block, by noting that each time the while-test
+ * succeeds, we can reinterpret V as a uniform random number in the range
+ * 0 to p. Therefore, instead of choosing a new V, we just adjust p to be
+ * the appropriate fraction of its former value, and our next loop
+ * makes the appropriate probabilistic test.
+ *
+ * We have initially K > k > 0. If the loop reduces K to equal k,
+ * the next while-test must fail since p will become exactly zero
+ * (we assume there will not be roundoff error in the division).
+ * (Note: Knuth suggests a "<=" loop condition, but we use "<" just
+ * to be doubly sure about roundoff error.) Therefore K cannot become
+ * less than k, which means that we cannot fail to select enough blocks.
+ *----------
+ */
+ V = sampler_random_fract();
+ p = 1.0 - (double) k / (double) K;
+ while (V < p)
+ {
+ /* skip */
+ bs->t++;
+ K--; /* keep K == N - t */
+
+ /* adjust p to be new cutoff point in reduced range */
+ p *= 1.0 - (double) k / (double) K;
+ }
+
+ /* select */
+ bs->m++;
+ return bs->t++;
+}
+
+/*
+ * These two routines embody Algorithm Z from "Random sampling with a
+ * reservoir" by Jeffrey S. Vitter, in ACM Trans. Math. Softw. 11, 1
+ * (Mar. 1985), Pages 37-57. Vitter describes his algorithm in terms
+ * of the count S of records to skip before processing another record.
+ * It is computed primarily based on t, the number of records already read.
+ * The only extra state needed between calls is W, a random state variable.
+ *
+ * reservoir_init_selection_state computes the initial W value.
+ *
+ * Given that we've already read t records (t >= n), reservoir_get_next_S
+ * determines the number of records to skip before the next record is
+ * processed.
+ */
+void
+reservoir_init_selection_state(ReservoirState rs, int n)
+{
+ /* Initial value of W (for use when Algorithm Z is first applied) */
+ *rs = exp(-log(sampler_random_fract()) / n);
+}
+
+double
+reservoir_get_next_S(ReservoirState rs, double t, int n)
+{
+ double S;
+
+ /* The magic constant here is T from Vitter's paper */
+ if (t <= (22.0 * n))
+ {
+ /* Process records using Algorithm X until t is large enough */
+ double V,
+ quot;
+
+ V = sampler_random_fract(); /* Generate V */
+ S = 0;
+ t += 1;
+ /* Note: "num" in Vitter's code is always equal to t - n */
+ quot = (t - (double) n) / t;
+ /* Find min S satisfying (4.1) */
+ while (quot > V)
+ {
+ S += 1;
+ t += 1;
+ quot *= (t - (double) n) / t;
+ }
+ }
+ else
+ {
+ /* Now apply Algorithm Z */
+ double W = *rs;
+ double term = t - (double) n + 1;
+
+ for (;;)
+ {
+ double numer,
+ numer_lim,
+ denom;
+ double U,
+ X,
+ lhs,
+ rhs,
+ y,
+ tmp;
+
+ /* Generate U and X */
+ U = sampler_random_fract();
+ X = t * (W - 1.0);
+ S = floor(X); /* S is tentatively set to floor(X) */
+ /* Test if U <= h(S)/cg(X) in the manner of (6.3) */
+ tmp = (t + 1) / term;
+ lhs = exp(log(((U * tmp * tmp) * (term + S)) / (t + X)) / n);
+ rhs = (((t + X) / (term + S)) * term) / t;
+ if (lhs <= rhs)
+ {
+ W = rhs / lhs;
+ break;
+ }
+ /* Test if U <= f(S)/cg(X) */
+ y = (((U * (t + 1)) / term) * (t + S + 1)) / (t + X);
+ if ((double) n < S)
+ {
+ denom = t;
+ numer_lim = term + S;
+ }
+ else
+ {
+ denom = t - (double) n + S;
+ numer_lim = t + 1;
+ }
+ for (numer = t + S; numer >= numer_lim; numer -= 1)
+ {
+ y *= numer / denom;
+ denom -= 1;
+ }
+ W = exp(-log(sampler_random_fract()) / n); /* Generate W in advance */
+ if (exp(log(y) / n) <= (t + X) / t)
+ break;
+ }
+ *rs = W;
+ }
+ return S;
+}
+
+
+/*----------
+ * Random number generator used by sampling
+ *----------
+ */
+
+/* Select a random value R uniformly distributed in (0 - 1) */
+double
+sampler_random_fract()
+{
+ return ((double) random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
+}
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 71f016552b..ce7b28d22c 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -197,8 +197,5 @@ extern void analyze_rel(Oid relid, RangeVar *relation, int options,
VacuumParams *params, List *va_cols, bool in_outer_xact,
BufferAccessStrategy bstrategy);
extern bool std_typanalyze(VacAttrStats *stats);
-extern double anl_random_fract(void);
-extern double anl_init_selection_state(int n);
-extern double anl_get_next_S(double t, int n, double *stateptr);
#endif /* VACUUM_H */
diff --git a/src/include/utils/sampling.h b/src/include/utils/sampling.h
new file mode 100644
index 0000000000..e3e7f9cf6a
--- /dev/null
+++ b/src/include/utils/sampling.h
@@ -0,0 +1,44 @@
+/*-------------------------------------------------------------------------
+ *
+ * sampling.h
+ * definitions for sampling functions
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/sampling.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SAMPLING_H
+#define SAMPLING_H
+
+#include "storage/bufmgr.h"
+
+extern double sampler_random_fract(void);
+
+/* Block sampling methods */
+/* Data structure for Algorithm S from Knuth 3.4.2 */
+typedef struct
+{
+ BlockNumber N; /* number of blocks, known in advance */
+ int n; /* desired sample size */
+ BlockNumber t; /* current block number */
+ int m; /* blocks selected so far */
+} BlockSamplerData;
+
+typedef BlockSamplerData *BlockSampler;
+
+extern void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
+ int samplesize, long randseed);
+extern bool BlockSampler_HasMore(BlockSampler bs);
+extern BlockNumber BlockSampler_Next(BlockSampler bs);
+
+/* Reservoid sampling methods */
+typedef double ReservoirStateData;
+typedef ReservoirStateData *ReservoirState;
+
+extern void reservoir_init_selection_state(ReservoirState rs, int n);
+extern double reservoir_get_next_S(ReservoirState rs, double t, int n);
+
+#endif /* SAMPLING_H */
--
cgit v1.2.3
From f6d208d6e51810c73f0e02c477984a6b44627f11 Mon Sep 17 00:00:00 2001
From: Simon Riggs
Date: Fri, 15 May 2015 14:37:10 -0400
Subject: TABLESAMPLE, SQL Standard and extensible
Add a TABLESAMPLE clause to SELECT statements that allows
user to specify random BERNOULLI sampling or block level
SYSTEM sampling. Implementation allows for extensible
sampling functions to be written, using a standard API.
Basic version follows SQLStandard exactly. Usable
concrete use cases for the sampling API follow in later
commits.
Petr Jelinek
Reviewed by Michael Paquier and Simon Riggs
---
contrib/file_fdw/file_fdw.c | 2 +-
contrib/postgres_fdw/postgres_fdw.c | 2 +-
doc/src/sgml/catalogs.sgml | 120 +++++++++
doc/src/sgml/ref/select.sgml | 61 ++++-
src/backend/access/Makefile | 3 +-
src/backend/access/heap/heapam.c | 41 ++-
src/backend/access/tablesample/Makefile | 17 ++
src/backend/access/tablesample/bernoulli.c | 235 +++++++++++++++++
src/backend/access/tablesample/system.c | 186 ++++++++++++++
src/backend/access/tablesample/tablesample.c | 368 +++++++++++++++++++++++++++
src/backend/catalog/Makefile | 5 +-
src/backend/commands/analyze.c | 2 +-
src/backend/commands/explain.c | 21 ++
src/backend/executor/Makefile | 2 +-
src/backend/executor/execAmi.c | 8 +
src/backend/executor/execCurrent.c | 1 +
src/backend/executor/execProcnode.c | 14 +
src/backend/executor/nodeSamplescan.c | 256 +++++++++++++++++++
src/backend/nodes/copyfuncs.c | 60 +++++
src/backend/nodes/equalfuncs.c | 37 +++
src/backend/nodes/nodeFuncs.c | 28 ++
src/backend/nodes/outfuncs.c | 48 ++++
src/backend/nodes/readfuncs.c | 45 ++++
src/backend/optimizer/path/allpaths.c | 49 ++++
src/backend/optimizer/path/costsize.c | 67 +++++
src/backend/optimizer/plan/createplan.c | 69 +++++
src/backend/optimizer/plan/planner.c | 15 +-
src/backend/optimizer/plan/setrefs.c | 11 +
src/backend/optimizer/plan/subselect.c | 1 +
src/backend/optimizer/util/pathnode.c | 22 ++
src/backend/parser/gram.y | 36 ++-
src/backend/parser/parse_clause.c | 56 ++++
src/backend/parser/parse_func.c | 143 +++++++++++
src/backend/rewrite/rewriteHandler.c | 3 +
src/backend/utils/adt/ruleutils.c | 50 ++++
src/backend/utils/cache/lsyscache.c | 27 ++
src/backend/utils/cache/syscache.c | 23 ++
src/backend/utils/misc/sampling.c | 33 ++-
src/include/access/heapam.h | 4 +
src/include/access/relscan.h | 1 +
src/include/access/tablesample.h | 60 +++++
src/include/catalog/indexing.h | 5 +
src/include/catalog/pg_proc.h | 27 ++
src/include/catalog/pg_tablesample_method.h | 78 ++++++
src/include/executor/nodeSamplescan.h | 24 ++
src/include/nodes/execnodes.h | 9 +
src/include/nodes/nodes.h | 4 +
src/include/nodes/parsenodes.h | 37 +++
src/include/nodes/plannodes.h | 6 +
src/include/optimizer/cost.h | 1 +
src/include/optimizer/pathnode.h | 2 +
src/include/parser/kwlist.h | 1 +
src/include/parser/parse_func.h | 5 +
src/include/port.h | 4 +
src/include/utils/lsyscache.h | 1 +
src/include/utils/rel.h | 1 -
src/include/utils/sampling.h | 15 +-
src/include/utils/syscache.h | 2 +
src/port/erand48.c | 3 -
src/test/regress/expected/rowsecurity.out | 26 ++
src/test/regress/expected/sanity_check.out | 1 +
src/test/regress/expected/tablesample.out | 231 +++++++++++++++++
src/test/regress/parallel_schedule | 2 +-
src/test/regress/serial_schedule | 1 +
src/test/regress/sql/rowsecurity.sql | 4 +
src/test/regress/sql/tablesample.sql | 74 ++++++
66 files changed, 2756 insertions(+), 40 deletions(-)
create mode 100644 src/backend/access/tablesample/Makefile
create mode 100644 src/backend/access/tablesample/bernoulli.c
create mode 100644 src/backend/access/tablesample/system.c
create mode 100644 src/backend/access/tablesample/tablesample.c
create mode 100644 src/backend/executor/nodeSamplescan.c
create mode 100644 src/include/access/tablesample.h
create mode 100644 src/include/catalog/pg_tablesample_method.h
create mode 100644 src/include/executor/nodeSamplescan.h
create mode 100644 src/test/regress/expected/tablesample.out
create mode 100644 src/test/regress/sql/tablesample.sql
(limited to 'contrib/postgres_fdw/postgres_fdw.c')
diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c
index ea4ed40082..499f24ff28 100644
--- a/contrib/file_fdw/file_fdw.c
+++ b/contrib/file_fdw/file_fdw.c
@@ -1097,7 +1097,7 @@ file_acquire_sample_rows(Relation onerel, int elevel,
* Found a suitable tuple, so save it, replacing one old tuple
* at random
*/
- int k = (int) (targrows * sampler_random_fract());
+ int k = (int) (targrows * sampler_random_fract(rstate.randstate));
Assert(k >= 0 && k < targrows);
heap_freetuple(rows[k]);
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 43288c27b0..d420cb2d0c 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -2557,7 +2557,7 @@ analyze_row_processor(PGresult *res, int row, PgFdwAnalyzeState *astate)
if (astate->rowstoskip <= 0)
{
/* Choose a random reservoir element to replace. */
- pos = (int) (targrows * sampler_random_fract());
+ pos = (int) (targrows * sampler_random_fract(astate->rstate.randstate));
Assert(pos >= 0 && pos < targrows);
heap_freetuple(astate->rows[pos]);
}
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 6c51974961..5b36487609 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -278,6 +278,11 @@
planner statistics
+
+ pg_tablesample_method
+ table sampling methods
+
+
pg_tablespacetablespaces within this database cluster
@@ -6127,6 +6132,121 @@
+
+ pg_tabesample_method
+
+
+ pg_am
+
+
+
+ The catalog pg_tablesample_method stores
+ information about table sampling methods which can be used in
+ TABLESAMPLE clause of a SELECT
+ statement.
+
+
+
+ pg_tablesample_method> Columns
+
+
+
+
+ Name
+ Type
+ References
+ Description
+
+
+
+
+
+ oid
+ oid
+
+ Row identifier (hidden attribute; must be explicitly selected)
+
+
+
+ tsmname
+ name
+
+ Name of the sampling method
+
+
+
+ tsmseqscan
+ bool
+
+ If true, the sampling method scans the whole table sequentially.
+
+
+
+
+ tsmpagemode
+ bool
+
+ If true, the sampling method always reads the pages completely.
+
+
+
+
+ tsminit
+ regproc
+ pg_proc.oid
+ Initialize the sampling scan function
+
+
+
+ tsmnextblock
+ regproc
+ pg_proc.oid
+ Get next block number function
+
+
+
+ tsmnexttuple
+ regproc
+ pg_proc.oid
+ Get next tuple offset function
+
+
+
+ tsmexaminetuple
+ regproc
+ pg_proc.oid
+ Function which examines the tuple contents and decides if to
+ return it, or zero if none
+
+
+
+ tsmend
+ regproc
+ pg_proc.oid
+ End the sampling scan function
+
+
+
+ tsmreset
+ regproc
+ pg_proc.oid
+ Restart the state of sampling scan function
+
+
+
+ tsmcost
+ regproc
+ pg_proc.oid
+ Costing function
+
+
+
+
+
+
+
+
+
pg_tablespace
diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml
index 2295f63c13..42e04660a1 100644
--- a/doc/src/sgml/ref/select.sgml
+++ b/doc/src/sgml/ref/select.sgml
@@ -49,7 +49,7 @@ SELECT [ ALL | DISTINCT [ ON ( expressionwhere from_item can be one of:
- [ ONLY ] table_name [ * ] [ [ AS ] alias [ ( column_alias [, ...] ) ] ]
+ [ ONLY ] table_name [ * ] [ [ AS ] alias [ ( column_alias [, ...] ) ] ] [ TABLESAMPLE sampling_method ( argument [, ...] ) [ REPEATABLE ( seed ) ] ]
[ LATERAL ] ( select ) [ AS ] alias [ ( column_alias [, ...] ) ]
with_query_name [ [ AS ] alias [ ( column_alias [, ...] ) ] ]
[ LATERAL ] function_name ( [ argument [, ...] ] )
@@ -316,6 +316,50 @@ TABLE [ ONLY ] table_name [ * ]
+
+ TABLESAMPLE sampling_method ( argument [, ...] ) [ REPEATABLE ( seed ) ]
+
+
+ Table sample clause after
+ table_name indicates that
+ a sampling_method should
+ be used to retrieve subset of rows in the table.
+ The sampling_method can be
+ any sampling method installed in the database. There are currently two
+ sampling methods available in the standard
+ PostgreSQL distribution:
+
+
+ SYSTEM
+
+
+ BERNOULLI
+
+
+ Both of these sampling methods currently accept only single argument
+ which is the percent (floating point from 0 to 100) of the rows to
+ be returned.
+ The SYSTEM sampling method does block level
+ sampling with each block having the same chance of being selected and
+ returns all rows from each selected block.
+ The BERNOULLI scans whole table and returns
+ individual rows with equal probability. Additional sampling methods
+ may be installed in the database via extensions.
+
+
+ The optional parameter REPEATABLE uses the seed
+ parameter, which can be a number or expression producing a number, as
+ a random seed for sampling. Note that subsequent commands may return
+ different results even if same REPEATABLE clause was
+ specified. This happens because DML statements and
+ maintenance operations such as VACUUM> may affect physical
+ distribution of data. The setseed()> function will not
+ affect the sampling result when the REPEATABLE
+ parameter is used.
+
+
+
+
alias
@@ -1927,5 +1971,20 @@ SELECT distributors.* WHERE distributors.name = 'Westward';
ROWS FROM( ... )> is an extension of the SQL standard.
+
+
+ TABLESAMPLE clause
+
+
+ The TABLESAMPLE> clause is currently accepted only on physical
+ relations and materialized views.
+
+
+
+ Additional modules allow you to install custom sampling methods and use
+ them instead of the SQL standard methods.
+
+
+
diff --git a/src/backend/access/Makefile b/src/backend/access/Makefile
index 21721b48f0..bd93a6a8d1 100644
--- a/src/backend/access/Makefile
+++ b/src/backend/access/Makefile
@@ -8,6 +8,7 @@ subdir = src/backend/access
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
-SUBDIRS = brin common gin gist hash heap index nbtree rmgrdesc spgist transam
+SUBDIRS = brin common gin gist hash heap index nbtree rmgrdesc spgist \
+ tablesample transam
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 1a8d2f2d0b..f0c2394e60 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -80,8 +80,9 @@ bool synchronize_seqscans = true;
static HeapScanDesc heap_beginscan_internal(Relation relation,
Snapshot snapshot,
int nkeys, ScanKey key,
- bool allow_strat, bool allow_sync,
- bool is_bitmapscan, bool temp_snap);
+ bool allow_strat, bool allow_sync, bool allow_pagemode,
+ bool is_bitmapscan, bool is_samplescan,
+ bool temp_snap);
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup,
TransactionId xid, CommandId cid, int options);
static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf,
@@ -294,9 +295,10 @@ initscan(HeapScanDesc scan, ScanKey key, bool is_rescan)
/*
* Currently, we don't have a stats counter for bitmap heap scans (but the
- * underlying bitmap index scans will be counted).
+ * underlying bitmap index scans will be counted) or sample scans (we only
+ * update stats for tuple fetches there)
*/
- if (!scan->rs_bitmapscan)
+ if (!scan->rs_bitmapscan && !scan->rs_samplescan)
pgstat_count_heap_scan(scan->rs_rd);
}
@@ -315,7 +317,7 @@ heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk, BlockNumber numBlks)
* In page-at-a-time mode it performs additional work, namely determining
* which tuples on the page are visible.
*/
-static void
+void
heapgetpage(HeapScanDesc scan, BlockNumber page)
{
Buffer buffer;
@@ -1310,6 +1312,9 @@ heap_openrv_extended(const RangeVar *relation, LOCKMODE lockmode,
* HeapScanDesc for a bitmap heap scan. Although that scan technology is
* really quite unlike a standard seqscan, there is just enough commonality
* to make it worth using the same data structure.
+ *
+ * heap_beginscan_samplingscan is alternate entry point for setting up a
+ * HeapScanDesc for a TABLESAMPLE scan.
* ----------------
*/
HeapScanDesc
@@ -1317,7 +1322,7 @@ heap_beginscan(Relation relation, Snapshot snapshot,
int nkeys, ScanKey key)
{
return heap_beginscan_internal(relation, snapshot, nkeys, key,
- true, true, false, false);
+ true, true, true, false, false, false);
}
HeapScanDesc
@@ -1327,7 +1332,7 @@ heap_beginscan_catalog(Relation relation, int nkeys, ScanKey key)
Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
return heap_beginscan_internal(relation, snapshot, nkeys, key,
- true, true, false, true);
+ true, true, true, false, false, true);
}
HeapScanDesc
@@ -1336,7 +1341,8 @@ heap_beginscan_strat(Relation relation, Snapshot snapshot,
bool allow_strat, bool allow_sync)
{
return heap_beginscan_internal(relation, snapshot, nkeys, key,
- allow_strat, allow_sync, false, false);
+ allow_strat, allow_sync, true,
+ false, false, false);
}
HeapScanDesc
@@ -1344,14 +1350,24 @@ heap_beginscan_bm(Relation relation, Snapshot snapshot,
int nkeys, ScanKey key)
{
return heap_beginscan_internal(relation, snapshot, nkeys, key,
- false, false, true, false);
+ false, false, true, true, false, false);
+}
+
+HeapScanDesc
+heap_beginscan_sampling(Relation relation, Snapshot snapshot,
+ int nkeys, ScanKey key,
+ bool allow_strat, bool allow_pagemode)
+{
+ return heap_beginscan_internal(relation, snapshot, nkeys, key,
+ allow_strat, false, allow_pagemode,
+ false, true, false);
}
static HeapScanDesc
heap_beginscan_internal(Relation relation, Snapshot snapshot,
int nkeys, ScanKey key,
- bool allow_strat, bool allow_sync,
- bool is_bitmapscan, bool temp_snap)
+ bool allow_strat, bool allow_sync, bool allow_pagemode,
+ bool is_bitmapscan, bool is_samplescan, bool temp_snap)
{
HeapScanDesc scan;
@@ -1373,6 +1389,7 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot,
scan->rs_snapshot = snapshot;
scan->rs_nkeys = nkeys;
scan->rs_bitmapscan = is_bitmapscan;
+ scan->rs_samplescan = is_samplescan;
scan->rs_strategy = NULL; /* set in initscan */
scan->rs_allow_strat = allow_strat;
scan->rs_allow_sync = allow_sync;
@@ -1381,7 +1398,7 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot,
/*
* we can use page-at-a-time mode if it's an MVCC-safe snapshot
*/
- scan->rs_pageatatime = IsMVCCSnapshot(snapshot);
+ scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(snapshot);
/*
* For a seqscan in a serializable transaction, acquire a predicate lock
diff --git a/src/backend/access/tablesample/Makefile b/src/backend/access/tablesample/Makefile
new file mode 100644
index 0000000000..46eeb59f9c
--- /dev/null
+++ b/src/backend/access/tablesample/Makefile
@@ -0,0 +1,17 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+# Makefile for utils/tablesample
+#
+# IDENTIFICATION
+# src/backend/utils/tablesample/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/access/tablesample
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = tablesample.o system.o bernoulli.o
+
+include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/tablesample/bernoulli.c b/src/backend/access/tablesample/bernoulli.c
new file mode 100644
index 0000000000..c91f3f593e
--- /dev/null
+++ b/src/backend/access/tablesample/bernoulli.c
@@ -0,0 +1,235 @@
+/*-------------------------------------------------------------------------
+ *
+ * bernoulli.c
+ * interface routines for BERNOULLI tablesample method
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/tablesample/bernoulli.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "fmgr.h"
+
+#include "access/tablesample.h"
+#include "access/relscan.h"
+#include "nodes/execnodes.h"
+#include "nodes/relation.h"
+#include "optimizer/clauses.h"
+#include "storage/bufmgr.h"
+#include "utils/sampling.h"
+
+
+/* tsdesc */
+typedef struct
+{
+ uint32 seed; /* random seed */
+ BlockNumber startblock; /* starting block, we use ths for syncscan support */
+ BlockNumber nblocks; /* number of blocks */
+ BlockNumber blockno; /* current block */
+ float4 probability; /* probabilty that tuple will be returned (0.0-1.0) */
+ OffsetNumber lt; /* last tuple returned from current block */
+ SamplerRandomState randstate; /* random generator tsdesc */
+} BernoulliSamplerData;
+
+/*
+ * Initialize the state.
+ */
+Datum
+tsm_bernoulli_init(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ uint32 seed = PG_GETARG_UINT32(1);
+ float4 percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2);
+ HeapScanDesc scan = tsdesc->heapScan;
+ BernoulliSamplerData *sampler;
+
+ if (percent < 0 || percent > 100)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("invalid sample size"),
+ errhint("Sample size must be numeric value between 0 and 100 (inclusive).")));
+
+ sampler = palloc0(sizeof(BernoulliSamplerData));
+
+ /* Remember initial values for reinit */
+ sampler->seed = seed;
+ sampler->startblock = scan->rs_startblock;
+ sampler->nblocks = scan->rs_nblocks;
+ sampler->blockno = InvalidBlockNumber;
+ sampler->probability = percent / 100;
+ sampler->lt = InvalidOffsetNumber;
+ sampler_random_init_state(sampler->seed, sampler->randstate);
+
+ tsdesc->tsmdata = (void *) sampler;
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Get next block number to read or InvalidBlockNumber if we are at the
+ * end of the relation.
+ */
+Datum
+tsm_bernoulli_nextblock(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ BernoulliSamplerData *sampler =
+ (BernoulliSamplerData *) tsdesc->tsmdata;
+
+ /*
+ * Bernoulli sampling scans all blocks on the table and supports
+ * syncscan so loop from startblock to startblock instead of
+ * from 0 to nblocks.
+ */
+ if (sampler->blockno == InvalidBlockNumber)
+ sampler->blockno = sampler->startblock;
+ else
+ {
+ sampler->blockno++;
+
+ if (sampler->blockno >= sampler->nblocks)
+ sampler->blockno = 0;
+
+ if (sampler->blockno == sampler->startblock)
+ PG_RETURN_UINT32(InvalidBlockNumber);
+ }
+
+ PG_RETURN_UINT32(sampler->blockno);
+}
+
+/*
+ * Get next tuple from current block.
+ *
+ * This method implements the main logic in bernoulli sampling.
+ * The algorithm simply generates new random number (in 0.0-1.0 range) and if
+ * it falls within user specified probability (in the same range) return the
+ * tuple offset.
+ *
+ * It is ok here to return tuple offset without knowing if tuple is visible
+ * and not check it via examinetuple. The reason for that is that we do the
+ * coinflip (random number generation) for every tuple in the table. Since all
+ * tuples have same probability of being returned the visible and invisible
+ * tuples will be returned in same ratio as they have in the actual table.
+ * This means that there is no skew towards either visible or invisible tuples
+ * and the number returned visible tuples to from the executor node is the
+ * fraction of visible tuples which was specified in input.
+ *
+ * This is faster than doing the coinflip in the examinetuple because we don't
+ * have to do visibility checks on uninteresting tuples.
+ *
+ * If we reach end of the block return InvalidOffsetNumber which tells
+ * SampleScan to go to next block.
+ */
+Datum
+tsm_bernoulli_nexttuple(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ OffsetNumber maxoffset = PG_GETARG_UINT16(2);
+ BernoulliSamplerData *sampler =
+ (BernoulliSamplerData *) tsdesc->tsmdata;
+ OffsetNumber tupoffset = sampler->lt;
+ float4 probability = sampler->probability;
+
+ if (tupoffset == InvalidOffsetNumber)
+ tupoffset = FirstOffsetNumber;
+ else
+ tupoffset++;
+
+ /*
+ * Loop over tuple offsets until the random generator returns value that
+ * is within the probability of returning the tuple or until we reach
+ * end of the block.
+ *
+ * (This is our implementation of bernoulli trial)
+ */
+ while (sampler_random_fract(sampler->randstate) > probability)
+ {
+ tupoffset++;
+
+ if (tupoffset > maxoffset)
+ break;
+ }
+
+ if (tupoffset > maxoffset)
+ /* Tell SampleScan that we want next block. */
+ tupoffset = InvalidOffsetNumber;
+
+ sampler->lt = tupoffset;
+
+ PG_RETURN_UINT16(tupoffset);
+}
+
+/*
+ * Cleanup method.
+ */
+Datum
+tsm_bernoulli_end(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+
+ pfree(tsdesc->tsmdata);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Reset tsdesc (called by ReScan).
+ */
+Datum
+tsm_bernoulli_reset(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ BernoulliSamplerData *sampler =
+ (BernoulliSamplerData *) tsdesc->tsmdata;
+
+ sampler->blockno = InvalidBlockNumber;
+ sampler->lt = InvalidOffsetNumber;
+ sampler_random_init_state(sampler->seed, sampler->randstate);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Costing function.
+ */
+Datum
+tsm_bernoulli_cost(PG_FUNCTION_ARGS)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Path *path = (Path *) PG_GETARG_POINTER(1);
+ RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
+ List *args = (List *) PG_GETARG_POINTER(3);
+ BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
+ double *tuples = (double *) PG_GETARG_POINTER(5);
+ Node *pctnode;
+ float4 samplesize;
+
+ *pages = baserel->pages;
+
+ pctnode = linitial(args);
+ pctnode = estimate_expression_value(root, pctnode);
+
+ if (IsA(pctnode, RelabelType))
+ pctnode = (Node *) ((RelabelType *) pctnode)->arg;
+
+ if (IsA(pctnode, Const))
+ {
+ samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue);
+ samplesize /= 100.0;
+ }
+ else
+ {
+ /* Default samplesize if the estimation didn't return Const. */
+ samplesize = 0.1f;
+ }
+
+ *tuples = path->rows * samplesize;
+ path->rows = *tuples;
+
+ PG_RETURN_VOID();
+}
diff --git a/src/backend/access/tablesample/system.c b/src/backend/access/tablesample/system.c
new file mode 100644
index 0000000000..1412e511fa
--- /dev/null
+++ b/src/backend/access/tablesample/system.c
@@ -0,0 +1,186 @@
+/*-------------------------------------------------------------------------
+ *
+ * system.c
+ * interface routines for system tablesample method
+ *
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/tablesample/system.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "fmgr.h"
+
+#include "access/tablesample.h"
+#include "access/relscan.h"
+#include "nodes/execnodes.h"
+#include "nodes/relation.h"
+#include "optimizer/clauses.h"
+#include "storage/bufmgr.h"
+#include "utils/sampling.h"
+
+
+/*
+ * State
+ */
+typedef struct
+{
+ BlockSamplerData bs;
+ uint32 seed; /* random seed */
+ BlockNumber nblocks; /* number of block in relation */
+ int samplesize; /* number of blocks to return */
+ OffsetNumber lt; /* last tuple returned from current block */
+} SystemSamplerData;
+
+
+/*
+ * Initializes the state.
+ */
+Datum
+tsm_system_init(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ uint32 seed = PG_GETARG_UINT32(1);
+ float4 percent = PG_ARGISNULL(2) ? -1 : PG_GETARG_FLOAT4(2);
+ HeapScanDesc scan = tsdesc->heapScan;
+ SystemSamplerData *sampler;
+
+ if (percent < 0 || percent > 100)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("invalid sample size"),
+ errhint("Sample size must be numeric value between 0 and 100 (inclusive).")));
+
+ sampler = palloc0(sizeof(SystemSamplerData));
+
+ /* Remember initial values for reinit */
+ sampler->seed = seed;
+ sampler->nblocks = scan->rs_nblocks;
+ sampler->samplesize = 1 + (int) (sampler->nblocks * (percent / 100.0));
+ sampler->lt = InvalidOffsetNumber;
+
+ BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize,
+ sampler->seed);
+
+ tsdesc->tsmdata = (void *) sampler;
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Get next block number or InvalidBlockNumber when we're done.
+ *
+ * Uses the same logic as ANALYZE for picking the random blocks.
+ */
+Datum
+tsm_system_nextblock(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+ BlockNumber blockno;
+
+ if (!BlockSampler_HasMore(&sampler->bs))
+ PG_RETURN_UINT32(InvalidBlockNumber);
+
+ blockno = BlockSampler_Next(&sampler->bs);
+
+ PG_RETURN_UINT32(blockno);
+}
+
+/*
+ * Get next tuple offset in current block or InvalidOffsetNumber if we are done
+ * with this block.
+ */
+Datum
+tsm_system_nexttuple(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ OffsetNumber maxoffset = PG_GETARG_UINT16(2);
+ SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+ OffsetNumber tupoffset = sampler->lt;
+
+ if (tupoffset == InvalidOffsetNumber)
+ tupoffset = FirstOffsetNumber;
+ else
+ tupoffset++;
+
+ if (tupoffset > maxoffset)
+ tupoffset = InvalidOffsetNumber;
+
+ sampler->lt = tupoffset;
+
+ PG_RETURN_UINT16(tupoffset);
+}
+
+/*
+ * Cleanup method.
+ */
+Datum
+tsm_system_end(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+
+ pfree(tsdesc->tsmdata);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Reset state (called by ReScan).
+ */
+Datum
+tsm_system_reset(PG_FUNCTION_ARGS)
+{
+ TableSampleDesc *tsdesc = (TableSampleDesc *) PG_GETARG_POINTER(0);
+ SystemSamplerData *sampler = (SystemSamplerData *) tsdesc->tsmdata;
+
+ sampler->lt = InvalidOffsetNumber;
+ BlockSampler_Init(&sampler->bs, sampler->nblocks, sampler->samplesize,
+ sampler->seed);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Costing function.
+ */
+Datum
+tsm_system_cost(PG_FUNCTION_ARGS)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Path *path = (Path *) PG_GETARG_POINTER(1);
+ RelOptInfo *baserel = (RelOptInfo *) PG_GETARG_POINTER(2);
+ List *args = (List *) PG_GETARG_POINTER(3);
+ BlockNumber *pages = (BlockNumber *) PG_GETARG_POINTER(4);
+ double *tuples = (double *) PG_GETARG_POINTER(5);
+ Node *pctnode;
+ float4 samplesize;
+
+ pctnode = linitial(args);
+ pctnode = estimate_expression_value(root, pctnode);
+
+ if (IsA(pctnode, RelabelType))
+ pctnode = (Node *) ((RelabelType *) pctnode)->arg;
+
+ if (IsA(pctnode, Const))
+ {
+ samplesize = DatumGetFloat4(((Const *) pctnode)->constvalue);
+ samplesize /= 100.0;
+ }
+ else
+ {
+ /* Default samplesize if the estimation didn't return Const. */
+ samplesize = 0.1f;
+ }
+
+ *pages = baserel->pages * samplesize;
+ *tuples = path->rows * samplesize;
+ path->rows = *tuples;
+
+ PG_RETURN_VOID();
+}
diff --git a/src/backend/access/tablesample/tablesample.c b/src/backend/access/tablesample/tablesample.c
new file mode 100644
index 0000000000..ef55d062e7
--- /dev/null
+++ b/src/backend/access/tablesample/tablesample.c
@@ -0,0 +1,368 @@
+/*-------------------------------------------------------------------------
+ *
+ * tablesample.c
+ * TABLESAMPLE internal API
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/access/tablesample/tablesample.c
+ *
+ * TABLESAMPLE is the SQL standard clause for sampling the relations.
+ *
+ * The API is interface between the Executor and the TABLESAMPLE Methods.
+ *
+ * TABLESAMPLE Methods are implementations of actual sampling algorithms which
+ * can be used for returning a sample of the source relation.
+ * Methods don't read the table directly but are asked for block number and
+ * tuple offset which they want to examine (or return) and the tablesample
+ * interface implemented here does the reading for them.
+ *
+ * We currently only support sampling of the physical relations, but in the
+ * future we might extend the API to support subqueries as well.
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/tablesample.h"
+
+#include "catalog/pg_tablesample_method.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "storage/bufmgr.h"
+#include "storage/predicate.h"
+#include "utils/rel.h"
+#include "utils/tqual.h"
+
+
+static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan);
+
+
+/*
+ * Initialize the TABLESAMPLE Descriptor and the TABLESAMPLE Method.
+ */
+TableSampleDesc *
+tablesample_init(SampleScanState *scanstate, TableSampleClause *tablesample)
+{
+ FunctionCallInfoData fcinfo;
+ int i;
+ List *args = tablesample->args;
+ ListCell *arg;
+ ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
+ TableSampleDesc *tsdesc = (TableSampleDesc *) palloc0(sizeof(TableSampleDesc));
+
+ /* Load functions */
+ fmgr_info(tablesample->tsminit, &(tsdesc->tsminit));
+ fmgr_info(tablesample->tsmnextblock, &(tsdesc->tsmnextblock));
+ fmgr_info(tablesample->tsmnexttuple, &(tsdesc->tsmnexttuple));
+ if (OidIsValid(tablesample->tsmexaminetuple))
+ fmgr_info(tablesample->tsmexaminetuple, &(tsdesc->tsmexaminetuple));
+ else
+ tsdesc->tsmexaminetuple.fn_oid = InvalidOid;
+ fmgr_info(tablesample->tsmreset, &(tsdesc->tsmreset));
+ fmgr_info(tablesample->tsmend, &(tsdesc->tsmend));
+
+ InitFunctionCallInfoData(fcinfo, &tsdesc->tsminit,
+ list_length(args) + 2,
+ InvalidOid, NULL, NULL);
+
+ tsdesc->tupDesc = scanstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor;
+ tsdesc->heapScan = scanstate->ss.ss_currentScanDesc;
+
+ /* First argument for init function is always TableSampleDesc */
+ fcinfo.arg[0] = PointerGetDatum(tsdesc);
+ fcinfo.argnull[0] = false;
+
+ /*
+ * Second arg for init function is always REPEATABLE
+ * When tablesample->repeatable is NULL then REPEATABLE clause was not
+ * specified.
+ * When specified, the expression cannot evaluate to NULL.
+ */
+ if (tablesample->repeatable)
+ {
+ ExprState *argstate = ExecInitExpr((Expr *) tablesample->repeatable,
+ (PlanState *) scanstate);
+ fcinfo.arg[1] = ExecEvalExpr(argstate, econtext,
+ &fcinfo.argnull[1], NULL);
+ if (fcinfo.argnull[1])
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("REPEATABLE clause must be NOT NULL numeric value")));
+ }
+ else
+ {
+ fcinfo.arg[1] = UInt32GetDatum(random());
+ fcinfo.argnull[1] = false;
+ }
+
+ /* Rest of the arguments come from user. */
+ i = 2;
+ foreach(arg, args)
+ {
+ Expr *argexpr = (Expr *) lfirst(arg);
+ ExprState *argstate = ExecInitExpr(argexpr, (PlanState *) scanstate);
+
+ if (argstate == NULL)
+ {
+ fcinfo.argnull[i] = true;
+ fcinfo.arg[i] = (Datum) 0;;
+ }
+
+ fcinfo.arg[i] = ExecEvalExpr(argstate, econtext,
+ &fcinfo.argnull[i], NULL);
+ i++;
+ }
+ Assert(i == fcinfo.nargs);
+
+ (void) FunctionCallInvoke(&fcinfo);
+
+ return tsdesc;
+}
+
+/*
+ * Get next tuple from TABLESAMPLE Method.
+ */
+HeapTuple
+tablesample_getnext(TableSampleDesc *desc)
+{
+ HeapScanDesc scan = desc->heapScan;
+ HeapTuple tuple = &(scan->rs_ctup);
+ bool pagemode = scan->rs_pageatatime;
+ BlockNumber blockno;
+ Page page;
+ bool page_all_visible;
+ ItemId itemid;
+ OffsetNumber tupoffset,
+ maxoffset;
+
+ if (!scan->rs_inited)
+ {
+ /*
+ * return null immediately if relation is empty
+ */
+ if (scan->rs_nblocks == 0)
+ {
+ Assert(!BufferIsValid(scan->rs_cbuf));
+ tuple->t_data = NULL;
+ return NULL;
+ }
+ blockno = DatumGetInt32(FunctionCall1(&desc->tsmnextblock,
+ PointerGetDatum(desc)));
+ if (!BlockNumberIsValid(blockno))
+ {
+ tuple->t_data = NULL;
+ return NULL;
+ }
+
+ heapgetpage(scan, blockno);
+ scan->rs_inited = true;
+ }
+ else
+ {
+ /* continue from previously returned page/tuple */
+ blockno = scan->rs_cblock; /* current page */
+ }
+
+ /*
+ * When pagemode is disabled, the scan will do visibility checks for each
+ * tuple it finds so the buffer needs to be locked.
+ */
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+ page = (Page) BufferGetPage(scan->rs_cbuf);
+ page_all_visible = PageIsAllVisible(page);
+ maxoffset = PageGetMaxOffsetNumber(page);
+
+ for (;;)
+ {
+ CHECK_FOR_INTERRUPTS();
+
+ tupoffset = DatumGetUInt16(FunctionCall3(&desc->tsmnexttuple,
+ PointerGetDatum(desc),
+ UInt32GetDatum(blockno),
+ UInt16GetDatum(maxoffset)));
+
+ if (OffsetNumberIsValid(tupoffset))
+ {
+ bool visible;
+ bool found;
+
+ /* Skip invalid tuple pointers. */
+ itemid = PageGetItemId(page, tupoffset);
+ if (!ItemIdIsNormal(itemid))
+ continue;
+
+ tuple->t_data = (HeapTupleHeader) PageGetItem((Page) page, itemid);
+ tuple->t_len = ItemIdGetLength(itemid);
+ ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
+
+ if (page_all_visible)
+ visible = true;
+ else
+ visible = SampleTupleVisible(tuple, tupoffset, scan);
+
+ /*
+ * Let the sampling method examine the actual tuple and decide if we
+ * should return it.
+ *
+ * Note that we let it examine even invisible tuples for
+ * statistical purposes, but not return them since user should
+ * never see invisible tuples.
+ */
+ if (OidIsValid(desc->tsmexaminetuple.fn_oid))
+ {
+ found = DatumGetBool(FunctionCall4(&desc->tsmexaminetuple,
+ PointerGetDatum(desc),
+ UInt32GetDatum(blockno),
+ PointerGetDatum(tuple),
+ BoolGetDatum(visible)));
+ /* Should not happen if sampling method is well written. */
+ if (found && !visible)
+ elog(ERROR, "Sampling method wanted to return invisible tuple");
+ }
+ else
+ found = visible;
+
+ /* Found visible tuple, return it. */
+ if (found)
+ {
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+ break;
+ }
+ else
+ {
+ /* Try next tuple from same page. */
+ continue;
+ }
+ }
+
+
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+ blockno = DatumGetInt32(FunctionCall1(&desc->tsmnextblock,
+ PointerGetDatum(desc)));
+
+ /*
+ * Report our new scan position for synchronization purposes. We
+ * don't do that when moving backwards, however. That would just
+ * mess up any other forward-moving scanners.
+ *
+ * Note: we do this before checking for end of scan so that the
+ * final state of the position hint is back at the start of the
+ * rel. That's not strictly necessary, but otherwise when you run
+ * the same query multiple times the starting position would shift
+ * a little bit backwards on every invocation, which is confusing.
+ * We don't guarantee any specific ordering in general, though.
+ */
+ if (scan->rs_syncscan)
+ ss_report_location(scan->rs_rd, BlockNumberIsValid(blockno) ?
+ blockno : scan->rs_startblock);
+
+ /*
+ * Reached end of scan.
+ */
+ if (!BlockNumberIsValid(blockno))
+ {
+ if (BufferIsValid(scan->rs_cbuf))
+ ReleaseBuffer(scan->rs_cbuf);
+ scan->rs_cbuf = InvalidBuffer;
+ scan->rs_cblock = InvalidBlockNumber;
+ tuple->t_data = NULL;
+ scan->rs_inited = false;
+ return NULL;
+ }
+
+ heapgetpage(scan, blockno);
+
+ if (!pagemode)
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+ page = (Page) BufferGetPage(scan->rs_cbuf);
+ page_all_visible = PageIsAllVisible(page);
+ maxoffset = PageGetMaxOffsetNumber(page);
+ }
+
+ pgstat_count_heap_getnext(scan->rs_rd);
+
+ return &(scan->rs_ctup);
+}
+
+/*
+ * Reset the sampling to starting state
+ */
+void
+tablesample_reset(TableSampleDesc *desc)
+{
+ (void) FunctionCall1(&desc->tsmreset, PointerGetDatum(desc));
+}
+
+/*
+ * Signal the sampling method that the scan has finished.
+ */
+void
+tablesample_end(TableSampleDesc *desc)
+{
+ (void) FunctionCall1(&desc->tsmend, PointerGetDatum(desc));
+}
+
+/*
+ * Check visibility of the tuple.
+ */
+static bool
+SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
+{
+ /*
+ * If this scan is reading whole pages at a time, there is already
+ * visibility info present in rs_vistuples so we can just search it
+ * for the tupoffset.
+ */
+ if (scan->rs_pageatatime)
+ {
+ int start = 0,
+ end = scan->rs_ntuples - 1;
+
+ /*
+ * Do the binary search over rs_vistuples, it's already sorted by
+ * OffsetNumber so we don't need to do any sorting ourselves here.
+ *
+ * We could use bsearch() here but it's slower for integers because
+ * of the function call overhead and because it needs boiler plate code
+ * it would not save us anything code-wise anyway.
+ */
+ while (start <= end)
+ {
+ int mid = start + (end - start) / 2;
+ OffsetNumber curoffset = scan->rs_vistuples[mid];
+
+ if (curoffset == tupoffset)
+ return true;
+ else if (curoffset > tupoffset)
+ end = mid - 1;
+ else
+ start = mid + 1;
+ }
+
+ return false;
+ }
+ else
+ {
+ /* No pagemode, we have to check the tuple itself. */
+ Snapshot snapshot = scan->rs_snapshot;
+ Buffer buffer = scan->rs_cbuf;
+
+ bool visible = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
+
+ CheckForSerializableConflictOut(visible, scan->rs_rd, tuple, buffer,
+ snapshot);
+
+ return visible;
+ }
+}
diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile
index 37d05d1acc..3d1139b5ba 100644
--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@@ -40,9 +40,8 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\
pg_ts_parser.h pg_ts_template.h pg_extension.h \
pg_foreign_data_wrapper.h pg_foreign_server.h pg_user_mapping.h \
pg_foreign_table.h pg_policy.h pg_replication_origin.h \
- pg_default_acl.h pg_seclabel.h pg_shseclabel.h pg_collation.h pg_range.h \
- pg_transform.h \
- toasting.h indexing.h \
+ pg_tablesample_method.h pg_default_acl.h pg_seclabel.h pg_shseclabel.h \
+ pg_collation.h pg_range.h pg_transform.h toasting.h indexing.h \
)
# location of Catalog.pm
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 952cf204a0..65e329eab0 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -1150,7 +1150,7 @@ acquire_sample_rows(Relation onerel, int elevel,
* Found a suitable tuple, so save it, replacing one
* old tuple at random
*/
- int k = (int) (targrows * sampler_random_fract());
+ int k = (int) (targrows * sampler_random_fract(rstate.randstate));
Assert(k >= 0 && k < targrows);
heap_freetuple(rows[k]);
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index eeb8f19017..478771c6ba 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -731,6 +731,7 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
case T_ValuesScan:
case T_CteScan:
case T_WorkTableScan:
+ case T_SampleScan:
*rels_used = bms_add_member(*rels_used,
((Scan *) plan)->scanrelid);
break;
@@ -967,6 +968,21 @@ ExplainNode(PlanState *planstate, List *ancestors,
else
pname = sname;
break;
+ case T_SampleScan:
+ {
+ /*
+ * Fetch the tablesample method name from RTE.
+ *
+ * It would be nice to also show parameters, but since we
+ * support arbitrary expressions as parameter it might get
+ * quite messy.
+ */
+ RangeTblEntry *rte;
+ rte = rt_fetch(((SampleScan *) plan)->scanrelid, es->rtable);
+ custom_name = get_tablesample_method_name(rte->tablesample->tsmid);
+ pname = psprintf("Sample Scan (%s)", custom_name);
+ }
+ break;
case T_Material:
pname = sname = "Materialize";
break;
@@ -1089,6 +1105,9 @@ ExplainNode(PlanState *planstate, List *ancestors,
if (((Scan *) plan)->scanrelid > 0)
ExplainScanTarget((Scan *) plan, es);
break;
+ case T_SampleScan:
+ ExplainScanTarget((Scan *) plan, es);
+ break;
case T_IndexScan:
{
IndexScan *indexscan = (IndexScan *) plan;
@@ -1339,6 +1358,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
case T_CteScan:
case T_WorkTableScan:
case T_SubqueryScan:
+ case T_SampleScan:
show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
if (plan->qual)
show_instrumentation_count("Rows Removed by Filter", 1,
@@ -2238,6 +2258,7 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es)
case T_TidScan:
case T_ForeignScan:
case T_CustomScan:
+ case T_SampleScan:
case T_ModifyTable:
/* Assert it's on a real relation */
Assert(rte->rtekind == RTE_RELATION);
diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile
index bc5d373d68..08cba6fa2b 100644
--- a/src/backend/executor/Makefile
+++ b/src/backend/executor/Makefile
@@ -21,7 +21,7 @@ OBJS = execAmi.o execCurrent.o execGrouping.o execIndexing.o execJunk.o \
nodeLimit.o nodeLockRows.o \
nodeMaterial.o nodeMergeAppend.o nodeMergejoin.o nodeModifyTable.o \
nodeNestloop.o nodeFunctionscan.o nodeRecursiveunion.o nodeResult.o \
- nodeSeqscan.o nodeSetOp.o nodeSort.o nodeUnique.o \
+ nodeSamplescan.o nodeSeqscan.o nodeSetOp.o nodeSort.o nodeUnique.o \
nodeValuesscan.o nodeCtescan.o nodeWorktablescan.o \
nodeGroup.o nodeSubplan.o nodeSubqueryscan.o nodeTidscan.o \
nodeForeignscan.o nodeWindowAgg.o tstoreReceiver.o spi.o
diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c
index 6ebad2f03f..4948a265cb 100644
--- a/src/backend/executor/execAmi.c
+++ b/src/backend/executor/execAmi.c
@@ -39,6 +39,7 @@
#include "executor/nodeNestloop.h"
#include "executor/nodeRecursiveunion.h"
#include "executor/nodeResult.h"
+#include "executor/nodeSamplescan.h"
#include "executor/nodeSeqscan.h"
#include "executor/nodeSetOp.h"
#include "executor/nodeSort.h"
@@ -155,6 +156,10 @@ ExecReScan(PlanState *node)
ExecReScanSeqScan((SeqScanState *) node);
break;
+ case T_SampleScanState:
+ ExecReScanSampleScan((SampleScanState *) node);
+ break;
+
case T_IndexScanState:
ExecReScanIndexScan((IndexScanState *) node);
break;
@@ -480,6 +485,9 @@ ExecSupportsBackwardScan(Plan *node)
}
return false;
+ case T_SampleScan:
+ return false;
+
case T_Material:
case T_Sort:
/* these don't evaluate tlist */
diff --git a/src/backend/executor/execCurrent.c b/src/backend/executor/execCurrent.c
index d87be963a9..bcd287f874 100644
--- a/src/backend/executor/execCurrent.c
+++ b/src/backend/executor/execCurrent.c
@@ -261,6 +261,7 @@ search_plan_tree(PlanState *node, Oid table_oid)
* Relation scan nodes can all be treated alike
*/
case T_SeqScanState:
+ case T_SampleScanState:
case T_IndexScanState:
case T_IndexOnlyScanState:
case T_BitmapHeapScanState:
diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c
index 9892499fb7..03c2febc3e 100644
--- a/src/backend/executor/execProcnode.c
+++ b/src/backend/executor/execProcnode.c
@@ -102,6 +102,7 @@
#include "executor/nodeNestloop.h"
#include "executor/nodeRecursiveunion.h"
#include "executor/nodeResult.h"
+#include "executor/nodeSamplescan.h"
#include "executor/nodeSeqscan.h"
#include "executor/nodeSetOp.h"
#include "executor/nodeSort.h"
@@ -190,6 +191,11 @@ ExecInitNode(Plan *node, EState *estate, int eflags)
estate, eflags);
break;
+ case T_SampleScan:
+ result = (PlanState *) ExecInitSampleScan((SampleScan *) node,
+ estate, eflags);
+ break;
+
case T_IndexScan:
result = (PlanState *) ExecInitIndexScan((IndexScan *) node,
estate, eflags);
@@ -406,6 +412,10 @@ ExecProcNode(PlanState *node)
result = ExecSeqScan((SeqScanState *) node);
break;
+ case T_SampleScanState:
+ result = ExecSampleScan((SampleScanState *) node);
+ break;
+
case T_IndexScanState:
result = ExecIndexScan((IndexScanState *) node);
break;
@@ -644,6 +654,10 @@ ExecEndNode(PlanState *node)
ExecEndSeqScan((SeqScanState *) node);
break;
+ case T_SampleScanState:
+ ExecEndSampleScan((SampleScanState *) node);
+ break;
+
case T_IndexScanState:
ExecEndIndexScan((IndexScanState *) node);
break;
diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c
new file mode 100644
index 0000000000..fc89d1dca0
--- /dev/null
+++ b/src/backend/executor/nodeSamplescan.c
@@ -0,0 +1,256 @@
+/*-------------------------------------------------------------------------
+ *
+ * nodeSamplescan.c
+ * Support routines for sample scans of relations (table sampling).
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/executor/nodeSamplescan.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/tablesample.h"
+#include "executor/executor.h"
+#include "executor/nodeSamplescan.h"
+#include "miscadmin.h"
+#include "parser/parsetree.h"
+#include "pgstat.h"
+#include "storage/bufmgr.h"
+#include "storage/predicate.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+#include "utils/tqual.h"
+
+static void InitScanRelation(SampleScanState *node, EState *estate,
+ int eflags, TableSampleClause *tablesample);
+static TupleTableSlot *SampleNext(SampleScanState *node);
+
+
+/* ----------------------------------------------------------------
+ * Scan Support
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------------------------------------------------------
+ * SampleNext
+ *
+ * This is a workhorse for ExecSampleScan
+ * ----------------------------------------------------------------
+ */
+static TupleTableSlot *
+SampleNext(SampleScanState *node)
+{
+ TupleTableSlot *slot;
+ TableSampleDesc *tsdesc;
+ HeapTuple tuple;
+
+ /*
+ * get information from the scan state
+ */
+ slot = node->ss.ss_ScanTupleSlot;
+ tsdesc = node->tsdesc;
+
+ tuple = tablesample_getnext(tsdesc);
+
+ if (tuple)
+ ExecStoreTuple(tuple, /* tuple to store */
+ slot, /* slot to store in */
+ tsdesc->heapScan->rs_cbuf, /* buffer associated with this tuple */
+ false); /* don't pfree this pointer */
+ else
+ ExecClearTuple(slot);
+
+ return slot;
+}
+
+/*
+ * SampleRecheck -- access method routine to recheck a tuple in EvalPlanQual
+ */
+static bool
+SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
+{
+ /* No need to recheck for SampleScan */
+ return true;
+}
+
+/* ----------------------------------------------------------------
+ * ExecSampleScan(node)
+ *
+ * Scans the relation using the sampling method and returns
+ * the next qualifying tuple.
+ * We call the ExecScan() routine and pass it the appropriate
+ * access method functions.
+ * ----------------------------------------------------------------
+ */
+TupleTableSlot *
+ExecSampleScan(SampleScanState *node)
+{
+ return ExecScan((ScanState *) node,
+ (ExecScanAccessMtd) SampleNext,
+ (ExecScanRecheckMtd) SampleRecheck);
+}
+
+/* ----------------------------------------------------------------
+ * InitScanRelation
+ *
+ * Set up to access the scan relation.
+ * ----------------------------------------------------------------
+ */
+static void
+InitScanRelation(SampleScanState *node, EState *estate, int eflags,
+ TableSampleClause *tablesample)
+{
+ Relation currentRelation;
+
+ /*
+ * get the relation object id from the relid'th entry in the range table,
+ * open that relation and acquire appropriate lock on it.
+ */
+ currentRelation = ExecOpenScanRelation(estate,
+ ((SampleScan *) node->ss.ps.plan)->scanrelid,
+ eflags);
+
+ node->ss.ss_currentRelation = currentRelation;
+
+ /*
+ * Even though we aren't going to do a conventional seqscan, it is useful
+ * to create a HeapScanDesc --- many of the fields in it are usable.
+ */
+ node->ss.ss_currentScanDesc =
+ heap_beginscan_sampling(currentRelation, estate->es_snapshot, 0, NULL,
+ tablesample->tsmseqscan,
+ tablesample->tsmpagemode);
+
+ /* and report the scan tuple slot's rowtype */
+ ExecAssignScanType(&node->ss, RelationGetDescr(currentRelation));
+}
+
+
+/* ----------------------------------------------------------------
+ * ExecInitSampleScan
+ * ----------------------------------------------------------------
+ */
+SampleScanState *
+ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
+{
+ SampleScanState *scanstate;
+ RangeTblEntry *rte = rt_fetch(node->scanrelid,
+ estate->es_range_table);
+
+ Assert(outerPlan(node) == NULL);
+ Assert(innerPlan(node) == NULL);
+ Assert(rte->tablesample != NULL);
+
+ /*
+ * create state structure
+ */
+ scanstate = makeNode(SampleScanState);
+ scanstate->ss.ps.plan = (Plan *) node;
+ scanstate->ss.ps.state = estate;
+
+ /*
+ * Miscellaneous initialization
+ *
+ * create expression context for node
+ */
+ ExecAssignExprContext(estate, &scanstate->ss.ps);
+
+ /*
+ * initialize child expressions
+ */
+ scanstate->ss.ps.targetlist = (List *)
+ ExecInitExpr((Expr *) node->plan.targetlist,
+ (PlanState *) scanstate);
+ scanstate->ss.ps.qual = (List *)
+ ExecInitExpr((Expr *) node->plan.qual,
+ (PlanState *) scanstate);
+
+ /*
+ * tuple table initialization
+ */
+ ExecInitResultTupleSlot(estate, &scanstate->ss.ps);
+ ExecInitScanTupleSlot(estate, &scanstate->ss);
+
+ /*
+ * initialize scan relation
+ */
+ InitScanRelation(scanstate, estate, eflags, rte->tablesample);
+
+ scanstate->ss.ps.ps_TupFromTlist = false;
+
+ /*
+ * Initialize result tuple type and projection info.
+ */
+ ExecAssignResultTypeFromTL(&scanstate->ss.ps);
+ ExecAssignScanProjectionInfo(&scanstate->ss);
+
+ scanstate->tsdesc = tablesample_init(scanstate, rte->tablesample);
+
+ return scanstate;
+}
+
+/* ----------------------------------------------------------------
+ * ExecEndSampleScan
+ *
+ * frees any storage allocated through C routines.
+ * ----------------------------------------------------------------
+ */
+void
+ExecEndSampleScan(SampleScanState *node)
+{
+ /*
+ * Tell sampling function that we finished the scan.
+ */
+ tablesample_end(node->tsdesc);
+
+ /*
+ * Free the exprcontext
+ */
+ ExecFreeExprContext(&node->ss.ps);
+
+ /*
+ * clean out the tuple table
+ */
+ ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
+ ExecClearTuple(node->ss.ss_ScanTupleSlot);
+
+ /*
+ * close heap scan
+ */
+ heap_endscan(node->ss.ss_currentScanDesc);
+
+ /*
+ * close the heap relation.
+ */
+ ExecCloseScanRelation(node->ss.ss_currentRelation);
+}
+
+/* ----------------------------------------------------------------
+ * Join Support
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------------------------------------------------------
+ * ExecReScanSampleScan
+ *
+ * Rescans the relation.
+ *
+ * ----------------------------------------------------------------
+ */
+void
+ExecReScanSampleScan(SampleScanState *node)
+{
+ heap_rescan(node->ss.ss_currentScanDesc, NULL);
+
+ /*
+ * Tell sampling function to reset its state for rescan.
+ */
+ tablesample_reset(node->tsdesc);
+
+ ExecScanReScan(&node->ss);
+}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 25839eed94..bdc7e61935 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -641,6 +641,22 @@ _copyCustomScan(const CustomScan *from)
return newnode;
}
+/*
+ * _copySampleScan
+ */
+static SampleScan *
+_copySampleScan(const SampleScan *from)
+{
+ SampleScan *newnode = makeNode(SampleScan);
+
+ /*
+ * copy node superclass fields
+ */
+ CopyScanFields((const Scan *) from, (Scan *) newnode);
+
+ return newnode;
+}
+
/*
* CopyJoinFields
*
@@ -2063,6 +2079,7 @@ _copyRangeTblEntry(const RangeTblEntry *from)
COPY_SCALAR_FIELD(rtekind);
COPY_SCALAR_FIELD(relid);
COPY_SCALAR_FIELD(relkind);
+ COPY_NODE_FIELD(tablesample);
COPY_NODE_FIELD(subquery);
COPY_SCALAR_FIELD(security_barrier);
COPY_SCALAR_FIELD(jointype);
@@ -2224,6 +2241,40 @@ _copyCommonTableExpr(const CommonTableExpr *from)
return newnode;
}
+static RangeTableSample *
+_copyRangeTableSample(const RangeTableSample *from)
+{
+ RangeTableSample *newnode = makeNode(RangeTableSample);
+
+ COPY_NODE_FIELD(relation);
+ COPY_STRING_FIELD(method);
+ COPY_NODE_FIELD(repeatable);
+ COPY_NODE_FIELD(args);
+
+ return newnode;
+}
+
+static TableSampleClause *
+_copyTableSampleClause(const TableSampleClause *from)
+{
+ TableSampleClause *newnode = makeNode(TableSampleClause);
+
+ COPY_SCALAR_FIELD(tsmid);
+ COPY_SCALAR_FIELD(tsmseqscan);
+ COPY_SCALAR_FIELD(tsmpagemode);
+ COPY_SCALAR_FIELD(tsminit);
+ COPY_SCALAR_FIELD(tsmnextblock);
+ COPY_SCALAR_FIELD(tsmnexttuple);
+ COPY_SCALAR_FIELD(tsmexaminetuple);
+ COPY_SCALAR_FIELD(tsmend);
+ COPY_SCALAR_FIELD(tsmreset);
+ COPY_SCALAR_FIELD(tsmcost);
+ COPY_NODE_FIELD(repeatable);
+ COPY_NODE_FIELD(args);
+
+ return newnode;
+}
+
static A_Expr *
_copyAExpr(const A_Expr *from)
{
@@ -4179,6 +4230,9 @@ copyObject(const void *from)
case T_CustomScan:
retval = _copyCustomScan(from);
break;
+ case T_SampleScan:
+ retval = _copySampleScan(from);
+ break;
case T_Join:
retval = _copyJoin(from);
break;
@@ -4842,6 +4896,12 @@ copyObject(const void *from)
case T_CommonTableExpr:
retval = _copyCommonTableExpr(from);
break;
+ case T_RangeTableSample:
+ retval = _copyRangeTableSample(from);
+ break;
+ case T_TableSampleClause:
+ retval = _copyTableSampleClause(from);
+ break;
case T_FuncWithArgs:
retval = _copyFuncWithArgs(from);
break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index c4b3615caf..d483221fb7 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -2359,6 +2359,7 @@ _equalRangeTblEntry(const RangeTblEntry *a, const RangeTblEntry *b)
COMPARE_SCALAR_FIELD(rtekind);
COMPARE_SCALAR_FIELD(relid);
COMPARE_SCALAR_FIELD(relkind);
+ COMPARE_NODE_FIELD(tablesample);
COMPARE_NODE_FIELD(subquery);
COMPARE_SCALAR_FIELD(security_barrier);
COMPARE_SCALAR_FIELD(jointype);
@@ -2502,6 +2503,36 @@ _equalCommonTableExpr(const CommonTableExpr *a, const CommonTableExpr *b)
return true;
}
+static bool
+_equalRangeTableSample(const RangeTableSample *a, const RangeTableSample *b)
+{
+ COMPARE_NODE_FIELD(relation);
+ COMPARE_STRING_FIELD(method);
+ COMPARE_NODE_FIELD(repeatable);
+ COMPARE_NODE_FIELD(args);
+
+ return true;
+}
+
+static bool
+_equalTableSampleClause(const TableSampleClause *a, const TableSampleClause *b)
+{
+ COMPARE_SCALAR_FIELD(tsmid);
+ COMPARE_SCALAR_FIELD(tsmseqscan);
+ COMPARE_SCALAR_FIELD(tsmpagemode);
+ COMPARE_SCALAR_FIELD(tsminit);
+ COMPARE_SCALAR_FIELD(tsmnextblock);
+ COMPARE_SCALAR_FIELD(tsmnexttuple);
+ COMPARE_SCALAR_FIELD(tsmexaminetuple);
+ COMPARE_SCALAR_FIELD(tsmend);
+ COMPARE_SCALAR_FIELD(tsmreset);
+ COMPARE_SCALAR_FIELD(tsmcost);
+ COMPARE_NODE_FIELD(repeatable);
+ COMPARE_NODE_FIELD(args);
+
+ return true;
+}
+
static bool
_equalXmlSerialize(const XmlSerialize *a, const XmlSerialize *b)
{
@@ -3236,6 +3267,12 @@ equal(const void *a, const void *b)
case T_CommonTableExpr:
retval = _equalCommonTableExpr(a, b);
break;
+ case T_RangeTableSample:
+ retval = _equalRangeTableSample(a, b);
+ break;
+ case T_TableSampleClause:
+ retval = _equalTableSampleClause(a, b);
+ break;
case T_FuncWithArgs:
retval = _equalFuncWithArgs(a, b);
break;
diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c
index eac0215923..42d62d32d9 100644
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -2058,6 +2058,14 @@ range_table_walker(List *rtable,
switch (rte->rtekind)
{
case RTE_RELATION:
+ if (rte->tablesample)
+ {
+ if (walker(rte->tablesample->args, context))
+ return true;
+ if (walker(rte->tablesample->repeatable, context))
+ return true;
+ }
+ break;
case RTE_CTE:
/* nothing to do */
break;
@@ -2813,6 +2821,14 @@ range_table_mutator(List *rtable,
switch (rte->rtekind)
{
case RTE_RELATION:
+ if (rte->tablesample)
+ {
+ MUTATE(rte->tablesample->args, rte->tablesample->args,
+ List *);
+ MUTATE(rte->tablesample->repeatable,
+ rte->tablesample->repeatable, Node *);
+ }
+ break;
case RTE_CTE:
/* we don't bother to copy eref, aliases, etc; OK? */
break;
@@ -3309,6 +3325,18 @@ raw_expression_tree_walker(Node *node,
break;
case T_CommonTableExpr:
return walker(((CommonTableExpr *) node)->ctequery, context);
+ case T_RangeTableSample:
+ {
+ RangeTableSample *rts = (RangeTableSample *) node;
+
+ if (walker(rts->relation, context))
+ return true;
+ if (walker(rts->repeatable, context))
+ return true;
+ if (walker(rts->args, context))
+ return true;
+ }
+ break;
default:
elog(ERROR, "unrecognized node type: %d",
(int) nodeTag(node));
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index fe868b889d..7918553da0 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -591,6 +591,14 @@ _outCustomScan(StringInfo str, const CustomScan *node)
node->methods->TextOutCustomScan(str, node);
}
+static void
+_outSampleScan(StringInfo str, const SampleScan *node)
+{
+ WRITE_NODE_TYPE("SAMPLESCAN");
+
+ _outScanInfo(str, (const Scan *) node);
+}
+
static void
_outJoin(StringInfo str, const Join *node)
{
@@ -2444,6 +2452,36 @@ _outCommonTableExpr(StringInfo str, const CommonTableExpr *node)
WRITE_NODE_FIELD(ctecolcollations);
}
+static void
+_outRangeTableSample(StringInfo str, const RangeTableSample *node)
+{
+ WRITE_NODE_TYPE("RANGETABLESAMPLE");
+
+ WRITE_NODE_FIELD(relation);
+ WRITE_STRING_FIELD(method);
+ WRITE_NODE_FIELD(repeatable);
+ WRITE_NODE_FIELD(args);
+}
+
+static void
+_outTableSampleClause(StringInfo str, const TableSampleClause *node)
+{
+ WRITE_NODE_TYPE("TABLESAMPLECLAUSE");
+
+ WRITE_OID_FIELD(tsmid);
+ WRITE_BOOL_FIELD(tsmseqscan);
+ WRITE_BOOL_FIELD(tsmpagemode);
+ WRITE_OID_FIELD(tsminit);
+ WRITE_OID_FIELD(tsmnextblock);
+ WRITE_OID_FIELD(tsmnexttuple);
+ WRITE_OID_FIELD(tsmexaminetuple);
+ WRITE_OID_FIELD(tsmend);
+ WRITE_OID_FIELD(tsmreset);
+ WRITE_OID_FIELD(tsmcost);
+ WRITE_NODE_FIELD(repeatable);
+ WRITE_NODE_FIELD(args);
+}
+
static void
_outSetOperationStmt(StringInfo str, const SetOperationStmt *node)
{
@@ -2474,6 +2512,7 @@ _outRangeTblEntry(StringInfo str, const RangeTblEntry *node)
case RTE_RELATION:
WRITE_OID_FIELD(relid);
WRITE_CHAR_FIELD(relkind);
+ WRITE_NODE_FIELD(tablesample);
break;
case RTE_SUBQUERY:
WRITE_NODE_FIELD(subquery);
@@ -2973,6 +3012,9 @@ _outNode(StringInfo str, const void *obj)
case T_CustomScan:
_outCustomScan(str, obj);
break;
+ case T_SampleScan:
+ _outSampleScan(str, obj);
+ break;
case T_Join:
_outJoin(str, obj);
break;
@@ -3319,6 +3361,12 @@ _outNode(StringInfo str, const void *obj)
case T_CommonTableExpr:
_outCommonTableExpr(str, obj);
break;
+ case T_RangeTableSample:
+ _outRangeTableSample(str, obj);
+ break;
+ case T_TableSampleClause:
+ _outTableSampleClause(str, obj);
+ break;
case T_SetOperationStmt:
_outSetOperationStmt(str, obj);
break;
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 8136306e1e..c8fb894a75 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -351,6 +351,46 @@ _readCommonTableExpr(void)
READ_DONE();
}
+/*
+ * _readRangeTableSample
+ */
+static RangeTableSample *
+_readRangeTableSample(void)
+{
+ READ_LOCALS(RangeTableSample);
+
+ READ_NODE_FIELD(relation);
+ READ_STRING_FIELD(method);
+ READ_NODE_FIELD(repeatable);
+ READ_NODE_FIELD(args);
+
+ READ_DONE();
+}
+
+/*
+ * _readTableSampleClause
+ */
+static TableSampleClause *
+_readTableSampleClause(void)
+{
+ READ_LOCALS(TableSampleClause);
+
+ READ_OID_FIELD(tsmid);
+ READ_BOOL_FIELD(tsmseqscan);
+ READ_BOOL_FIELD(tsmpagemode);
+ READ_OID_FIELD(tsminit);
+ READ_OID_FIELD(tsmnextblock);
+ READ_OID_FIELD(tsmnexttuple);
+ READ_OID_FIELD(tsmexaminetuple);
+ READ_OID_FIELD(tsmend);
+ READ_OID_FIELD(tsmreset);
+ READ_OID_FIELD(tsmcost);
+ READ_NODE_FIELD(repeatable);
+ READ_NODE_FIELD(args);
+
+ READ_DONE();
+}
+
/*
* _readSetOperationStmt
*/
@@ -1255,6 +1295,7 @@ _readRangeTblEntry(void)
case RTE_RELATION:
READ_OID_FIELD(relid);
READ_CHAR_FIELD(relkind);
+ READ_NODE_FIELD(tablesample);
break;
case RTE_SUBQUERY:
READ_NODE_FIELD(subquery);
@@ -1351,6 +1392,10 @@ parseNodeString(void)
return_value = _readRowMarkClause();
else if (MATCH("COMMONTABLEEXPR", 15))
return_value = _readCommonTableExpr();
+ else if (MATCH("RANGETABLESAMPLE", 16))
+ return_value = _readRangeTableSample();
+ else if (MATCH("TABLESAMPLECLAUSE", 17))
+ return_value = _readTableSampleClause();
else if (MATCH("SETOPERATIONSTMT", 16))
return_value = _readSetOperationStmt();
else if (MATCH("ALIAS", 5))
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 9caca94f64..4cd1bf65e7 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -71,6 +71,10 @@ static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
+static void set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
+static void set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
+ RangeTblEntry *rte);
static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel,
@@ -265,6 +269,11 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel,
/* Foreign table */
set_foreign_size(root, rel, rte);
}
+ else if (rte->tablesample != NULL)
+ {
+ /* Sampled relation */
+ set_tablesample_rel_size(root, rel, rte);
+ }
else
{
/* Plain relation */
@@ -332,6 +341,11 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
/* Foreign table */
set_foreign_pathlist(root, rel, rte);
}
+ else if (rte->tablesample != NULL)
+ {
+ /* Build sample scan on relation */
+ set_tablesample_rel_pathlist(root, rel, rte);
+ }
else
{
/* Plain relation */
@@ -417,6 +431,41 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
create_tidscan_paths(root, rel);
}
+/*
+ * set_tablesample_rel_size
+ * Set size estimates for a sampled relation.
+ */
+static void
+set_tablesample_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+ /* Mark rel with estimated output rows, width, etc */
+ set_baserel_size_estimates(root, rel);
+}
+
+/*
+ * set_tablesample_rel_pathlist
+ * Build access paths for a sampled relation
+ *
+ * There is only one possible path - sampling scan
+ */
+static void
+set_tablesample_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+ Relids required_outer;
+ Path *path;
+
+ /*
+ * We don't support pushing join clauses into the quals of a seqscan, but
+ * it could still have required parameterization due to LATERAL refs in
+ * its tlist.
+ */
+ required_outer = rel->lateral_relids;
+
+ /* We only do sample scan if it was requested */
+ path = create_samplescan_path(root, rel, required_outer);
+ rel->pathlist = list_make1(path);
+}
+
/*
* set_foreign_size
* Set size estimates for a foreign table RTE
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 1a0d358c5f..c2b2b7622a 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -219,6 +219,73 @@ cost_seqscan(Path *path, PlannerInfo *root,
path->total_cost = startup_cost + run_cost;
}
+/*
+ * cost_samplescan
+ * Determines and returns the cost of scanning a relation using sampling.
+ *
+ * From planner/optimizer perspective, we don't care all that much about cost
+ * itself since there is always only one scan path to consider when sampling
+ * scan is present, but number of rows estimation is still important.
+ *
+ * 'baserel' is the relation to be scanned
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ */
+void
+cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
+{
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ double spc_seq_page_cost,
+ spc_random_page_cost,
+ spc_page_cost;
+ QualCost qpqual_cost;
+ Cost cpu_per_tuple;
+ BlockNumber pages;
+ double tuples;
+ RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
+ TableSampleClause *tablesample = rte->tablesample;
+
+ /* Should only be applied to base relations */
+ Assert(baserel->relid > 0);
+ Assert(baserel->rtekind == RTE_RELATION);
+
+ /* Mark the path with the correct row estimate */
+ if (path->param_info)
+ path->rows = path->param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
+
+ /* Call the sampling method's costing function. */
+ OidFunctionCall6(tablesample->tsmcost, PointerGetDatum(root),
+ PointerGetDatum(path), PointerGetDatum(baserel),
+ PointerGetDatum(tablesample->args),
+ PointerGetDatum(&pages), PointerGetDatum(&tuples));
+
+ /* fetch estimated page cost for tablespace containing table */
+ get_tablespace_page_costs(baserel->reltablespace,
+ &spc_random_page_cost,
+ &spc_seq_page_cost);
+
+
+ spc_page_cost = tablesample->tsmseqscan ? spc_seq_page_cost :
+ spc_random_page_cost;
+
+ /*
+ * disk costs
+ */
+ run_cost += spc_page_cost * pages;
+
+ /* CPU costs */
+ get_restriction_qual_cost(root, baserel, path->param_info, &qpqual_cost);
+
+ startup_cost += qpqual_cost.startup;
+ cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
+ run_cost += cpu_per_tuple * tuples;
+
+ path->startup_cost = startup_cost;
+ path->total_cost = startup_cost + run_cost;
+}
+
/*
* cost_index
* Determines and returns the cost of scanning a relation using an index.
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 783e34b4fb..c6095167e8 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -59,6 +59,8 @@ static Material *create_material_plan(PlannerInfo *root, MaterialPath *best_path
static Plan *create_unique_plan(PlannerInfo *root, UniquePath *best_path);
static SeqScan *create_seqscan_plan(PlannerInfo *root, Path *best_path,
List *tlist, List *scan_clauses);
+static SampleScan *create_samplescan_plan(PlannerInfo *root, Path *best_path,
+ List *tlist, List *scan_clauses);
static Scan *create_indexscan_plan(PlannerInfo *root, IndexPath *best_path,
List *tlist, List *scan_clauses, bool indexonly);
static BitmapHeapScan *create_bitmap_scan_plan(PlannerInfo *root,
@@ -101,6 +103,7 @@ static List *order_qual_clauses(PlannerInfo *root, List *clauses);
static void copy_path_costsize(Plan *dest, Path *src);
static void copy_plan_costsize(Plan *dest, Plan *src);
static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid);
+static SampleScan *make_samplescan(List *qptlist, List *qpqual, Index scanrelid);
static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid,
Oid indexid, List *indexqual, List *indexqualorig,
List *indexorderby, List *indexorderbyorig, Oid *indexorderbyops,
@@ -229,6 +232,7 @@ create_plan_recurse(PlannerInfo *root, Path *best_path)
switch (best_path->pathtype)
{
case T_SeqScan:
+ case T_SampleScan:
case T_IndexScan:
case T_IndexOnlyScan:
case T_BitmapHeapScan:
@@ -344,6 +348,13 @@ create_scan_plan(PlannerInfo *root, Path *best_path)
scan_clauses);
break;
+ case T_SampleScan:
+ plan = (Plan *) create_samplescan_plan(root,
+ best_path,
+ tlist,
+ scan_clauses);
+ break;
+
case T_IndexScan:
plan = (Plan *) create_indexscan_plan(root,
(IndexPath *) best_path,
@@ -547,6 +558,7 @@ disuse_physical_tlist(PlannerInfo *root, Plan *plan, Path *path)
switch (path->pathtype)
{
case T_SeqScan:
+ case T_SampleScan:
case T_IndexScan:
case T_IndexOnlyScan:
case T_BitmapHeapScan:
@@ -1133,6 +1145,45 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path,
return scan_plan;
}
+/*
+ * create_samplescan_plan
+ * Returns a samplecan plan for the base relation scanned by 'best_path'
+ * with restriction clauses 'scan_clauses' and targetlist 'tlist'.
+ */
+static SampleScan *
+create_samplescan_plan(PlannerInfo *root, Path *best_path,
+ List *tlist, List *scan_clauses)
+{
+ SampleScan *scan_plan;
+ Index scan_relid = best_path->parent->relid;
+
+ /* it should be a base rel with tablesample clause... */
+ Assert(scan_relid > 0);
+ Assert(best_path->parent->rtekind == RTE_RELATION);
+ Assert(best_path->pathtype == T_SampleScan);
+
+ /* Sort clauses into best execution order */
+ scan_clauses = order_qual_clauses(root, scan_clauses);
+
+ /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */
+ scan_clauses = extract_actual_clauses(scan_clauses, false);
+
+ /* Replace any outer-relation variables with nestloop params */
+ if (best_path->param_info)
+ {
+ scan_clauses = (List *)
+ replace_nestloop_params(root, (Node *) scan_clauses);
+ }
+
+ scan_plan = make_samplescan(tlist,
+ scan_clauses,
+ scan_relid);
+
+ copy_path_costsize(&scan_plan->plan, best_path);
+
+ return scan_plan;
+}
+
/*
* create_indexscan_plan
* Returns an indexscan plan for the base relation scanned by 'best_path'
@@ -3378,6 +3429,24 @@ make_seqscan(List *qptlist,
return node;
}
+static SampleScan *
+make_samplescan(List *qptlist,
+ List *qpqual,
+ Index scanrelid)
+{
+ SampleScan *node = makeNode(SampleScan);
+ Plan *plan = &node->plan;
+
+ /* cost should be inserted by caller */
+ plan->targetlist = qptlist;
+ plan->qual = qpqual;
+ plan->lefttree = NULL;
+ plan->righttree = NULL;
+ node->scanrelid = scanrelid;
+
+ return node;
+}
+
static IndexScan *
make_indexscan(List *qptlist,
List *qpqual,
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 8de57c8e6b..9ba10516bb 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -60,6 +60,7 @@ planner_hook_type planner_hook = NULL;
#define EXPRKIND_LIMIT 6
#define EXPRKIND_APPINFO 7
#define EXPRKIND_PHV 8
+#define EXPRKIND_TABLESAMPLE 9
/* Passthrough data for standard_qp_callback */
typedef struct
@@ -486,7 +487,19 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
int kind;
- if (rte->rtekind == RTE_SUBQUERY)
+ if (rte->rtekind == RTE_RELATION)
+ {
+ if (rte->tablesample)
+ {
+ rte->tablesample->args = (List *)
+ preprocess_expression(root, (Node *) rte->tablesample->args,
+ EXPRKIND_TABLESAMPLE);
+ rte->tablesample->repeatable = (Node *)
+ preprocess_expression(root, rte->tablesample->repeatable,
+ EXPRKIND_TABLESAMPLE);
+ }
+ }
+ else if (rte->rtekind == RTE_SUBQUERY)
{
/*
* We don't want to do all preprocessing yet on the subquery's
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 517409d28a..73b1988836 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -451,6 +451,17 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
fix_scan_list(root, splan->plan.qual, rtoffset);
}
break;
+ case T_SampleScan:
+ {
+ SampleScan *splan = (SampleScan *) plan;
+
+ splan->scanrelid += rtoffset;
+ splan->plan.targetlist =
+ fix_scan_list(root, splan->plan.targetlist, rtoffset);
+ splan->plan.qual =
+ fix_scan_list(root, splan->plan.qual, rtoffset);
+ }
+ break;
case T_IndexScan:
{
IndexScan *splan = (IndexScan *) plan;
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index afccee53ac..2f7f5c0df0 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -2167,6 +2167,7 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
break;
case T_SeqScan:
+ case T_SampleScan:
context.paramids = bms_add_members(context.paramids, scan_params);
break;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index faca30b322..ea7a47bdf4 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -705,6 +705,26 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
return pathnode;
}
+/*
+ * create_samplescan_path
+ * Like seqscan but uses sampling function while scanning.
+ */
+Path *
+create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
+{
+ Path *pathnode = makeNode(Path);
+
+ pathnode->pathtype = T_SampleScan;
+ pathnode->parent = rel;
+ pathnode->param_info = get_baserel_parampathinfo(root, rel,
+ required_outer);
+ pathnode->pathkeys = NIL; /* samplescan has unordered result */
+
+ cost_samplescan(pathnode, root, rel);
+
+ return pathnode;
+}
+
/*
* create_index_path
* Creates a path node for an index scan.
@@ -1778,6 +1798,8 @@ reparameterize_path(PlannerInfo *root, Path *path,
case T_SubqueryScan:
return create_subqueryscan_path(root, rel, path->pathkeys,
required_outer);
+ case T_SampleScan:
+ return (Path *) create_samplescan_path(root, rel, required_outer);
default:
break;
}
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 2dce87879e..1439783068 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -454,6 +454,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%type relation_expr
%type relation_expr_opt_alias
%type target_el single_set_clause set_target insert_column_item
+%type relation_expr_tablesample tablesample_clause opt_repeatable_clause
%type generic_option_name
%type generic_option_arg
@@ -625,8 +626,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
STATEMENT STATISTICS STDIN STDOUT STORAGE STRICT_P STRIP_P SUBSTRING
SYMMETRIC SYSID SYSTEM_P
- TABLE TABLES TABLESPACE TEMP TEMPLATE TEMPORARY TEXT_P THEN TIME TIMESTAMP
- TO TRAILING TRANSACTION TRANSFORM TREAT TRIGGER TRIM TRUE_P
+ TABLE TABLES TABLESAMPLE TABLESPACE TEMP TEMPLATE TEMPORARY TEXT_P THEN
+ TIME TIMESTAMP TO TRAILING TRANSACTION TRANSFORM TREAT TRIGGER TRIM TRUE_P
TRUNCATE TRUSTED TYPE_P TYPES_P
UNBOUNDED UNCOMMITTED UNENCRYPTED UNION UNIQUE UNKNOWN UNLISTEN UNLOGGED
@@ -10386,6 +10387,10 @@ table_ref: relation_expr opt_alias_clause
$1->alias = $2;
$$ = (Node *) $1;
}
+ | relation_expr_tablesample
+ {
+ $$ = (Node *) $1;
+ }
| func_table func_alias_clause
{
RangeFunction *n = (RangeFunction *) $1;
@@ -10711,6 +10716,32 @@ relation_expr_opt_alias: relation_expr %prec UMINUS
}
;
+
+relation_expr_tablesample: relation_expr opt_alias_clause tablesample_clause
+ {
+ RangeTableSample *n = (RangeTableSample *) $3;
+ n->relation = $1;
+ n->relation->alias = $2;
+ $$ = (Node *) n;
+ }
+ ;
+
+tablesample_clause:
+ TABLESAMPLE ColId '(' expr_list ')' opt_repeatable_clause
+ {
+ RangeTableSample *n = makeNode(RangeTableSample);
+ n->method = $2;
+ n->args = $4;
+ n->repeatable = $6;
+ $$ = (Node *) n;
+ }
+ ;
+
+opt_repeatable_clause:
+ REPEATABLE '(' a_expr ')' { $$ = (Node *) $3; }
+ | /*EMPTY*/ { $$ = NULL; }
+ ;
+
/*
* func_table represents a function invocation in a FROM list. It can be
* a plain function call, like "foo(...)", or a ROWS FROM expression with
@@ -13804,6 +13835,7 @@ type_func_name_keyword:
| OVERLAPS
| RIGHT
| SIMILAR
+ | TABLESAMPLE
| VERBOSE
;
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 73c505ed85..6b1bbe57d0 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -17,6 +17,7 @@
#include "access/heapam.h"
#include "catalog/catalog.h"
+#include "access/htup_details.h"
#include "catalog/heap.h"
#include "catalog/pg_constraint.h"
#include "catalog/pg_type.h"
@@ -31,6 +32,7 @@
#include "parser/parse_coerce.h"
#include "parser/parse_collate.h"
#include "parser/parse_expr.h"
+#include "parser/parse_func.h"
#include "parser/parse_oper.h"
#include "parser/parse_relation.h"
#include "parser/parse_target.h"
@@ -39,6 +41,7 @@
#include "utils/guc.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
+#include "utils/syscache.h"
/* Convenience macro for the most common makeNamespaceItem() case */
@@ -419,6 +422,39 @@ transformJoinOnClause(ParseState *pstate, JoinExpr *j, List *namespace)
return result;
}
+static RangeTblEntry *
+transformTableSampleEntry(ParseState *pstate, RangeTableSample *rv)
+{
+ RangeTblEntry *rte = NULL;
+ CommonTableExpr *cte = NULL;
+ TableSampleClause *tablesample = NULL;
+
+ /* if relation has an unqualified name, it might be a CTE reference */
+ if (!rv->relation->schemaname)
+ {
+ Index levelsup;
+ cte = scanNameSpaceForCTE(pstate, rv->relation->relname, &levelsup);
+ }
+
+ /* We first need to build a range table entry */
+ if (!cte)
+ rte = transformTableEntry(pstate, rv->relation);
+
+ if (!rte ||
+ (rte->relkind != RELKIND_RELATION &&
+ rte->relkind != RELKIND_MATVIEW))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("TABLESAMPLE clause can only be used on tables and materialized views"),
+ parser_errposition(pstate, rv->relation->location)));
+
+ tablesample = ParseTableSample(pstate, rv->method, rv->repeatable,
+ rv->args, rv->relation->location);
+ rte->tablesample = tablesample;
+
+ return rte;
+}
+
/*
* transformTableEntry --- transform a RangeVar (simple relation reference)
*/
@@ -1127,6 +1163,26 @@ transformFromClauseItem(ParseState *pstate, Node *n,
return (Node *) j;
}
+ else if (IsA(n, RangeTableSample))
+ {
+ /* Tablesample reference */
+ RangeTableSample *rv = (RangeTableSample *) n;
+ RangeTblRef *rtr;
+ RangeTblEntry *rte = NULL;
+ int rtindex;
+
+ rte = transformTableSampleEntry(pstate, rv);
+
+ /* assume new rte is at end */
+ rtindex = list_length(pstate->p_rtable);
+ Assert(rte == rt_fetch(rtindex, pstate->p_rtable));
+ *top_rte = rte;
+ *top_rti = rtindex;
+ *namespace = list_make1(makeDefaultNSItem(rte));
+ rtr = makeNode(RangeTblRef);
+ rtr->rtindex = rtindex;
+ return (Node *) rtr;
+ }
else
elog(ERROR, "unrecognized node type: %d", (int) nodeTag(n));
return NULL; /* can't get here, keep compiler quiet */
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c
index f7affebf84..fa50f92d8d 100644
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -18,6 +18,7 @@
#include "catalog/pg_aggregate.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_type.h"
+#include "catalog/pg_tablesample_method.h"
#include "funcapi.h"
#include "lib/stringinfo.h"
#include "nodes/makefuncs.h"
@@ -26,6 +27,7 @@
#include "parser/parse_clause.h"
#include "parser/parse_coerce.h"
#include "parser/parse_func.h"
+#include "parser/parse_expr.h"
#include "parser/parse_relation.h"
#include "parser/parse_target.h"
#include "parser/parse_type.h"
@@ -767,6 +769,147 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs,
}
+/*
+ * ParseTableSample
+ *
+ * Parse TABLESAMPLE clause and process the arguments
+ */
+TableSampleClause *
+ParseTableSample(ParseState *pstate, char *samplemethod, Node *repeatable,
+ List *sampleargs, int location)
+{
+ HeapTuple tuple;
+ Form_pg_tablesample_method tsm;
+ Form_pg_proc procform;
+ TableSampleClause *tablesample;
+ List *fargs;
+ ListCell *larg;
+ int nargs, initnargs;
+ Oid init_arg_types[FUNC_MAX_ARGS];
+
+ /* Load the tablesample method */
+ tuple = SearchSysCache1(TABLESAMPLEMETHODNAME, PointerGetDatum(samplemethod));
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("tablesample method \"%s\" does not exist",
+ samplemethod),
+ parser_errposition(pstate, location)));
+
+ tablesample = makeNode(TableSampleClause);
+ tablesample->tsmid = HeapTupleGetOid(tuple);
+
+ tsm = (Form_pg_tablesample_method) GETSTRUCT(tuple);
+
+ tablesample->tsmseqscan = tsm->tsmseqscan;
+ tablesample->tsmpagemode = tsm->tsmpagemode;
+ tablesample->tsminit = tsm->tsminit;
+ tablesample->tsmnextblock = tsm->tsmnextblock;
+ tablesample->tsmnexttuple = tsm->tsmnexttuple;
+ tablesample->tsmexaminetuple = tsm->tsmexaminetuple;
+ tablesample->tsmend = tsm->tsmend;
+ tablesample->tsmreset = tsm->tsmreset;
+ tablesample->tsmcost = tsm->tsmcost;
+
+ ReleaseSysCache(tuple);
+
+ /* Validate the parameters against init function definition. */
+ tuple = SearchSysCache1(PROCOID,
+ ObjectIdGetDatum(tablesample->tsminit));
+
+ if (!HeapTupleIsValid(tuple)) /* should not happen */
+ elog(ERROR, "cache lookup failed for function %u",
+ tablesample->tsminit);
+
+ procform = (Form_pg_proc) GETSTRUCT(tuple);
+ initnargs = procform->pronargs;
+ Assert(initnargs >= 3);
+
+ /*
+ * First parameter is used to pass the SampleScanState, second is
+ * seed (REPEATABLE), skip the processing for them here, just assert
+ * that the types are correct.
+ */
+ Assert(procform->proargtypes.values[0] == INTERNALOID);
+ Assert(procform->proargtypes.values[1] == INT4OID);
+ initnargs -= 2;
+ memcpy(init_arg_types, procform->proargtypes.values + 2,
+ initnargs * sizeof(Oid));
+
+ /* Now we are done with the catalog */
+ ReleaseSysCache(tuple);
+
+ /* Process repeatable (seed) */
+ if (repeatable != NULL)
+ {
+ Node *arg = repeatable;
+
+ if (arg && IsA(arg, A_Const))
+ {
+ A_Const *con = (A_Const *) arg;
+
+ if (con->val.type == T_Null)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("REPEATABLE clause must be NOT NULL numeric value"),
+ parser_errposition(pstate, con->location)));
+
+ }
+
+ arg = transformExpr(pstate, arg, EXPR_KIND_FROM_FUNCTION);
+ arg = coerce_to_specific_type(pstate, arg, INT4OID, "REPEATABLE");
+ tablesample->repeatable = arg;
+ }
+ else
+ tablesample->repeatable = NULL;
+
+ /* Check user provided expected number of arguments. */
+ if (list_length(sampleargs) != initnargs)
+ ereport(ERROR,
+ (errcode(ERRCODE_TOO_MANY_ARGUMENTS),
+ errmsg_plural("tablesample method \"%s\" expects %d argument got %d",
+ "tablesample method \"%s\" expects %d arguments got %d",
+ initnargs,
+ samplemethod,
+ initnargs, list_length(sampleargs)),
+ parser_errposition(pstate, location)));
+
+ /* Transform the arguments, typecasting them as needed. */
+ fargs = NIL;
+ nargs = 0;
+ foreach(larg, sampleargs)
+ {
+ Node *inarg = (Node *) lfirst(larg);
+ Node *arg = transformExpr(pstate, inarg, EXPR_KIND_FROM_FUNCTION);
+ Oid argtype = exprType(arg);
+
+ if (argtype != init_arg_types[nargs])
+ {
+ if (!can_coerce_type(1, &argtype, &init_arg_types[nargs],
+ COERCION_IMPLICIT))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("wrong parameter %d for tablesample method \"%s\"",
+ nargs + 1, samplemethod),
+ errdetail("Expected type %s got %s.",
+ format_type_be(init_arg_types[nargs]),
+ format_type_be(argtype)),
+ parser_errposition(pstate, exprLocation(inarg))));
+
+ arg = coerce_type(pstate, arg, argtype, init_arg_types[nargs], -1,
+ COERCION_IMPLICIT, COERCE_IMPLICIT_CAST, -1);
+ }
+
+ fargs = lappend(fargs, arg);
+ nargs++;
+ }
+
+ /* Pass the arguments down */
+ tablesample->args = fargs;
+
+ return tablesample;
+}
+
/* func_match_argtypes()
*
* Given a list of candidate functions (having the right name and number
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index 39302a410b..e27afd1a3e 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -2209,6 +2209,9 @@ view_query_is_auto_updatable(Query *viewquery, bool check_cols)
base_rte->relkind != RELKIND_VIEW))
return gettext_noop("Views that do not select from a single table or view are not automatically updatable.");
+ if (base_rte->tablesample)
+ return gettext_noop("Views containing TABLESAMPLE are not automatically updatable.");
+
/*
* Check that the view has at least one updatable column. This is required
* for INSERT/UPDATE but not for DELETE.
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 903e80aea3..298eebf5e6 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -32,6 +32,7 @@
#include "catalog/pg_opclass.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_proc.h"
+#include "catalog/pg_tablesample_method.h"
#include "catalog/pg_trigger.h"
#include "catalog/pg_type.h"
#include "commands/defrem.h"
@@ -345,6 +346,8 @@ static void make_ruledef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
int prettyFlags);
static void make_viewdef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
int prettyFlags, int wrapColumn);
+static void get_tablesample_def(TableSampleClause *tablesample,
+ deparse_context *context);
static void get_query_def(Query *query, StringInfo buf, List *parentnamespace,
TupleDesc resultDesc,
int prettyFlags, int wrapColumn, int startIndent);
@@ -4220,6 +4223,50 @@ make_viewdef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
heap_close(ev_relation, AccessShareLock);
}
+/* ----------
+ * get_tablesample_def - Convert TableSampleClause back to SQL
+ * ----------
+ */
+static void
+get_tablesample_def(TableSampleClause *tablesample, deparse_context *context)
+{
+ StringInfo buf = context->buf;
+ HeapTuple tuple;
+ Form_pg_tablesample_method tsm;
+ char *tsmname;
+ int nargs;
+ ListCell *l;
+
+ /* Load the tablesample method */
+ tuple = SearchSysCache1(TABLESAMPLEMETHODOID, ObjectIdGetDatum(tablesample->tsmid));
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("cache lookup failed for tablesample method %u",
+ tablesample->tsmid)));
+
+ tsm = (Form_pg_tablesample_method) GETSTRUCT(tuple);
+ tsmname = NameStr(tsm->tsmname);
+ appendStringInfo(buf, " TABLESAMPLE %s (", quote_identifier(tsmname));
+
+ ReleaseSysCache(tuple);
+
+ nargs = 0;
+ foreach(l, tablesample->args)
+ {
+ if (nargs++ > 0)
+ appendStringInfoString(buf, ", ");
+ get_rule_expr((Node *) lfirst(l), context, true);
+ }
+ appendStringInfoChar(buf, ')');
+
+ if (tablesample->repeatable != NULL)
+ {
+ appendStringInfoString(buf, " REPEATABLE (");
+ get_rule_expr(tablesample->repeatable, context, true);
+ appendStringInfoChar(buf, ')');
+ }
+}
/* ----------
* get_query_def - Parse back one query parsetree
@@ -8529,6 +8576,9 @@ get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
only_marker(rte),
generate_relation_name(rte->relid,
context->namespaces));
+
+ if (rte->tablesample)
+ get_tablesample_def(rte->tablesample, context);
break;
case RTE_SUBQUERY:
/* Subquery RTE */
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index 1dc293297d..f259751e15 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -32,6 +32,7 @@
#include "catalog/pg_range.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_transform.h"
+#include "catalog/pg_tablesample_method.h"
#include "catalog/pg_type.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
@@ -2996,3 +2997,29 @@ get_range_subtype(Oid rangeOid)
else
return InvalidOid;
}
+
+/* ---------- PG_TABLESAMPLE_METHOD CACHE ---------- */
+
+/*
+ * get_tablesample_method_name - given a tablesample method OID,
+ * look up the name or NULL if not found
+ */
+char *
+get_tablesample_method_name(Oid tsmid)
+{
+ HeapTuple tuple;
+
+ tuple = SearchSysCache1(TABLESAMPLEMETHODOID, ObjectIdGetDatum(tsmid));
+ if (HeapTupleIsValid(tuple))
+ {
+ Form_pg_tablesample_method tup =
+ (Form_pg_tablesample_method) GETSTRUCT(tuple);
+ char *result;
+
+ result = pstrdup(NameStr(tup->tsmname));
+ ReleaseSysCache(tuple);
+ return result;
+ }
+ else
+ return NULL;
+}
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index f58e1cebf2..7def1be32a 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -56,6 +56,7 @@
#include "catalog/pg_shseclabel.h"
#include "catalog/pg_replication_origin.h"
#include "catalog/pg_statistic.h"
+#include "catalog/pg_tablesample_method.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_transform.h"
#include "catalog/pg_ts_config.h"
@@ -666,6 +667,28 @@ static const struct cachedesc cacheinfo[] = {
},
128
},
+ {TableSampleMethodRelationId, /* TABLESAMPLEMETHODNAME */
+ TableSampleMethodNameIndexId,
+ 1,
+ {
+ Anum_pg_tablesample_method_tsmname,
+ 0,
+ 0,
+ 0,
+ },
+ 2
+ },
+ {TableSampleMethodRelationId, /* TABLESAMPLEMETHODOID */
+ TableSampleMethodOidIndexId,
+ 1,
+ {
+ ObjectIdAttributeNumber,
+ 0,
+ 0,
+ 0,
+ },
+ 2
+ },
{TableSpaceRelationId, /* TABLESPACEOID */
TablespaceOidIndexId,
1,
diff --git a/src/backend/utils/misc/sampling.c b/src/backend/utils/misc/sampling.c
index 1eeabaf158..9becc63bf8 100644
--- a/src/backend/utils/misc/sampling.c
+++ b/src/backend/utils/misc/sampling.c
@@ -46,6 +46,8 @@ BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize,
bs->n = samplesize;
bs->t = 0; /* blocks scanned so far */
bs->m = 0; /* blocks selected so far */
+
+ sampler_random_init_state(randseed, bs->randstate);
}
bool
@@ -92,7 +94,7 @@ BlockSampler_Next(BlockSampler bs)
* less than k, which means that we cannot fail to select enough blocks.
*----------
*/
- V = sampler_random_fract();
+ V = sampler_random_fract(bs->randstate);
p = 1.0 - (double) k / (double) K;
while (V < p)
{
@@ -126,8 +128,14 @@ BlockSampler_Next(BlockSampler bs)
void
reservoir_init_selection_state(ReservoirState rs, int n)
{
+ /*
+ * Reservoir sampling is not used anywhere where it would need to return
+ * repeatable results so we can initialize it randomly.
+ */
+ sampler_random_init_state(random(), rs->randstate);
+
/* Initial value of W (for use when Algorithm Z is first applied) */
- *rs = exp(-log(sampler_random_fract()) / n);
+ rs->W = exp(-log(sampler_random_fract(rs->randstate)) / n);
}
double
@@ -142,7 +150,7 @@ reservoir_get_next_S(ReservoirState rs, double t, int n)
double V,
quot;
- V = sampler_random_fract(); /* Generate V */
+ V = sampler_random_fract(rs->randstate); /* Generate V */
S = 0;
t += 1;
/* Note: "num" in Vitter's code is always equal to t - n */
@@ -158,7 +166,7 @@ reservoir_get_next_S(ReservoirState rs, double t, int n)
else
{
/* Now apply Algorithm Z */
- double W = *rs;
+ double W = rs->W;
double term = t - (double) n + 1;
for (;;)
@@ -174,7 +182,7 @@ reservoir_get_next_S(ReservoirState rs, double t, int n)
tmp;
/* Generate U and X */
- U = sampler_random_fract();
+ U = sampler_random_fract(rs->randstate);
X = t * (W - 1.0);
S = floor(X); /* S is tentatively set to floor(X) */
/* Test if U <= h(S)/cg(X) in the manner of (6.3) */
@@ -203,11 +211,11 @@ reservoir_get_next_S(ReservoirState rs, double t, int n)
y *= numer / denom;
denom -= 1;
}
- W = exp(-log(sampler_random_fract()) / n); /* Generate W in advance */
+ W = exp(-log(sampler_random_fract(rs->randstate)) / n); /* Generate W in advance */
if (exp(log(y) / n) <= (t + X) / t)
break;
}
- *rs = W;
+ rs->W = W;
}
return S;
}
@@ -217,10 +225,17 @@ reservoir_get_next_S(ReservoirState rs, double t, int n)
* Random number generator used by sampling
*----------
*/
+void
+sampler_random_init_state(long seed, SamplerRandomState randstate)
+{
+ randstate[0] = RAND48_SEED_0;
+ randstate[1] = (unsigned short) seed;
+ randstate[2] = (unsigned short) (seed >> 16);
+}
/* Select a random value R uniformly distributed in (0 - 1) */
double
-sampler_random_fract()
+sampler_random_fract(SamplerRandomState randstate)
{
- return ((double) random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
+ return pg_erand48(randstate);
}
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 49c8ca4d66..eec7c95b21 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -114,8 +114,12 @@ extern HeapScanDesc heap_beginscan_strat(Relation relation, Snapshot snapshot,
bool allow_strat, bool allow_sync);
extern HeapScanDesc heap_beginscan_bm(Relation relation, Snapshot snapshot,
int nkeys, ScanKey key);
+extern HeapScanDesc heap_beginscan_sampling(Relation relation,
+ Snapshot snapshot, int nkeys, ScanKey key,
+ bool allow_strat, bool allow_pagemode);
extern void heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk,
BlockNumber endBlk);
+extern void heapgetpage(HeapScanDesc scan, BlockNumber page);
extern void heap_rescan(HeapScanDesc scan, ScanKey key);
extern void heap_endscan(HeapScanDesc scan);
extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction);
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index 5a0d724aca..1b9b299395 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -29,6 +29,7 @@ typedef struct HeapScanDescData
int rs_nkeys; /* number of scan keys */
ScanKey rs_key; /* array of scan key descriptors */
bool rs_bitmapscan; /* true if this is really a bitmap scan */
+ bool rs_samplescan; /* true if this is really a sample scan */
bool rs_pageatatime; /* verify visibility page-at-a-time? */
bool rs_allow_strat; /* allow or disallow use of access strategy */
bool rs_allow_sync; /* allow or disallow use of syncscan */
diff --git a/src/include/access/tablesample.h b/src/include/access/tablesample.h
new file mode 100644
index 0000000000..222fa8d556
--- /dev/null
+++ b/src/include/access/tablesample.h
@@ -0,0 +1,60 @@
+/*-------------------------------------------------------------------------
+ *
+ * tablesample.h
+ * Public header file for TABLESAMPLE clause interface
+ *
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/tablesample.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TABLESAMPLE_H
+#define TABLESAMPLE_H
+
+#include "access/relscan.h"
+#include "executor/executor.h"
+
+typedef struct TableSampleDesc {
+ HeapScanDesc heapScan;
+ TupleDesc tupDesc; /* Mostly useful for tsmexaminetuple */
+
+ void *tsmdata; /* private method data */
+
+ /* These point to he function of the TABLESAMPLE Method. */
+ FmgrInfo tsminit;
+ FmgrInfo tsmnextblock;
+ FmgrInfo tsmnexttuple;
+ FmgrInfo tsmexaminetuple;
+ FmgrInfo tsmreset;
+ FmgrInfo tsmend;
+} TableSampleDesc;
+
+
+extern TableSampleDesc *tablesample_init(SampleScanState *scanstate,
+ TableSampleClause *tablesample);
+extern HeapTuple tablesample_getnext(TableSampleDesc *desc);
+extern void tablesample_reset(TableSampleDesc *desc);
+extern void tablesample_end(TableSampleDesc *desc);
+extern HeapTuple tablesample_source_getnext(TableSampleDesc *desc);
+extern HeapTuple tablesample_source_gettup(TableSampleDesc *desc, ItemPointer tid,
+ bool *visible);
+
+extern Datum tsm_system_init(PG_FUNCTION_ARGS);
+extern Datum tsm_system_nextblock(PG_FUNCTION_ARGS);
+extern Datum tsm_system_nexttuple(PG_FUNCTION_ARGS);
+extern Datum tsm_system_end(PG_FUNCTION_ARGS);
+extern Datum tsm_system_reset(PG_FUNCTION_ARGS);
+extern Datum tsm_system_cost(PG_FUNCTION_ARGS);
+
+extern Datum tsm_bernoulli_init(PG_FUNCTION_ARGS);
+extern Datum tsm_bernoulli_nextblock(PG_FUNCTION_ARGS);
+extern Datum tsm_bernoulli_nexttuple(PG_FUNCTION_ARGS);
+extern Datum tsm_bernoulli_end(PG_FUNCTION_ARGS);
+extern Datum tsm_bernoulli_reset(PG_FUNCTION_ARGS);
+extern Datum tsm_bernoulli_cost(PG_FUNCTION_ARGS);
+
+
+#endif
diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h
index 71e0010a6f..f20567ed5f 100644
--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -316,6 +316,11 @@ DECLARE_UNIQUE_INDEX(pg_replication_origin_roiident_index, 6001, on pg_replicati
DECLARE_UNIQUE_INDEX(pg_replication_origin_roname_index, 6002, on pg_replication_origin using btree(roname varchar_pattern_ops));
#define ReplicationOriginNameIndex 6002
+DECLARE_UNIQUE_INDEX(pg_tablesample_method_name_index, 3331, on pg_tablesample_method using btree(tsmname name_ops));
+#define TableSampleMethodNameIndexId 3331
+DECLARE_UNIQUE_INDEX(pg_tablesample_method_oid_index, 3332, on pg_tablesample_method using btree(oid oid_ops));
+#define TableSampleMethodOidIndexId 3332
+
/* last step of initialization script: build the indexes declared above */
BUILD_INDICES
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 1c9edbc3b3..c2185bd9ad 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -5297,6 +5297,33 @@ DESCR("get an individual replication origin's replication progress");
DATA(insert OID = 6014 ( pg_show_replication_origin_status PGNSP PGUID 12 1 100 0 0 f f f f f t v 0 0 2249 "" "{26,25,3220,3220}" "{o,o,o,o}" "{local_id, external_id, remote_lsn, local_lsn}" _null_ _null_ pg_show_replication_origin_status _null_ _null_ _null_ ));
DESCR("get progress for all replication origins");
+/* tablesample */
+DATA(insert OID = 3335 ( tsm_system_init PGNSP PGUID 12 1 0 0 0 f f f f t f v 3 0 2278 "2281 23 700" _null_ _null_ _null_ _null_ _null_ tsm_system_init _null_ _null_ _null_ ));
+DESCR("tsm_system_init(internal)");
+DATA(insert OID = 3336 ( tsm_system_nextblock PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 23 "2281 16" _null_ _null_ _null_ _null_ _null_ tsm_system_nextblock _null_ _null_ _null_ ));
+DESCR("tsm_system_nextblock(internal)");
+DATA(insert OID = 3337 ( tsm_system_nexttuple PGNSP PGUID 12 1 0 0 0 f f f f t f v 4 0 21 "2281 23 21 16" _null_ _null_ _null_ _null_ _null_ tsm_system_nexttuple _null_ _null_ _null_ ));
+DESCR("tsm_system_nexttuple(internal)");
+DATA(insert OID = 3338 ( tsm_system_end PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_end _null_ _null_ _null_ ));
+DESCR("tsm_system_end(internal)");
+DATA(insert OID = 3339 ( tsm_system_reset PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_system_reset _null_ _null_ _null_ ));
+DESCR("tsm_system_reset(internal)");
+DATA(insert OID = 3340 ( tsm_system_cost PGNSP PGUID 12 1 0 0 0 f f f f t f v 7 0 2278 "2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ _null_ tsm_system_cost _null_ _null_ _null_ ));
+DESCR("tsm_system_cost(internal)");
+
+DATA(insert OID = 3341 ( tsm_bernoulli_init PGNSP PGUID 12 1 0 0 0 f f f f t f v 3 0 2278 "2281 23 700" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_init _null_ _null_ _null_ ));
+DESCR("tsm_bernoulli_init(internal)");
+DATA(insert OID = 3342 ( tsm_bernoulli_nextblock PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 23 "2281 16" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_nextblock _null_ _null_ _null_ ));
+DESCR("tsm_bernoulli_nextblock(internal)");
+DATA(insert OID = 3343 ( tsm_bernoulli_nexttuple PGNSP PGUID 12 1 0 0 0 f f f f t f v 4 0 21 "2281 23 21 16" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_nexttuple _null_ _null_ _null_ ));
+DESCR("tsm_bernoulli_nexttuple(internal)");
+DATA(insert OID = 3344 ( tsm_bernoulli_end PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_end _null_ _null_ _null_ ));
+DESCR("tsm_bernoulli_end(internal)");
+DATA(insert OID = 3345 ( tsm_bernoulli_reset PGNSP PGUID 12 1 0 0 0 f f f f t f v 1 0 2278 "2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_reset _null_ _null_ _null_ ));
+DESCR("tsm_bernoulli_reset(internal)");
+DATA(insert OID = 3346 ( tsm_bernoulli_cost PGNSP PGUID 12 1 0 0 0 f f f f t f v 7 0 2278 "2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ _null_ tsm_bernoulli_cost _null_ _null_ _null_ ));
+DESCR("tsm_bernoulli_cost(internal)");
+
/*
* Symbolic values for provolatile column: these indicate whether the result
* of a function is dependent *only* on the values of its explicit arguments,
diff --git a/src/include/catalog/pg_tablesample_method.h b/src/include/catalog/pg_tablesample_method.h
new file mode 100644
index 0000000000..968d1e696a
--- /dev/null
+++ b/src/include/catalog/pg_tablesample_method.h
@@ -0,0 +1,78 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_tablesample_method.h
+ * definition of the table scan methods.
+ *
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/catalog/pg_tablesample_method.h
+ *
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_TABLESAMPLE_METHOD_H
+#define PG_TABLESAMPLE_METHOD_H
+
+#include "catalog/genbki.h"
+#include "catalog/objectaddress.h"
+
+/* ----------------
+ * pg_tablesample_method definition. cpp turns this into
+ * typedef struct FormData_pg_tablesample_method
+ * ----------------
+ */
+#define TableSampleMethodRelationId 3330
+
+CATALOG(pg_tablesample_method,3330)
+{
+ NameData tsmname; /* tablesample method name */
+ bool tsmseqscan; /* does this method scan whole table sequentially? */
+ bool tsmpagemode; /* does this method scan page at a time? */
+ regproc tsminit; /* init scan function */
+ regproc tsmnextblock; /* function returning next block to sample
+ or InvalidBlockOffset if finished */
+ regproc tsmnexttuple; /* function returning next tuple offset from current block
+ or InvalidOffsetNumber if end of the block was reacher */
+ regproc tsmexaminetuple; /* optional function which can examine tuple contents and
+ decide if tuple should be returned or not */
+ regproc tsmend; /* end scan function*/
+ regproc tsmreset; /* reset state - used by rescan */
+ regproc tsmcost; /* costing function */
+} FormData_pg_tablesample_method;
+
+/* ----------------
+ * Form_pg_tablesample_method corresponds to a pointer to a tuple with
+ * the format of pg_tablesample_method relation.
+ * ----------------
+ */
+typedef FormData_pg_tablesample_method *Form_pg_tablesample_method;
+
+/* ----------------
+ * compiler constants for pg_tablesample_method
+ * ----------------
+ */
+#define Natts_pg_tablesample_method 10
+#define Anum_pg_tablesample_method_tsmname 1
+#define Anum_pg_tablesample_method_tsmseqscan 2
+#define Anum_pg_tablesample_method_tsmpagemode 3
+#define Anum_pg_tablesample_method_tsminit 4
+#define Anum_pg_tablesample_method_tsmnextblock 5
+#define Anum_pg_tablesample_method_tsmnexttuple 6
+#define Anum_pg_tablesample_method_tsmexaminetuple 7
+#define Anum_pg_tablesample_method_tsmend 8
+#define Anum_pg_tablesample_method_tsmreset 9
+#define Anum_pg_tablesample_method_tsmcost 10
+
+/* ----------------
+ * initial contents of pg_tablesample_method
+ * ----------------
+ */
+
+DATA(insert OID = 3333 ( system false true tsm_system_init tsm_system_nextblock tsm_system_nexttuple - tsm_system_end tsm_system_reset tsm_system_cost ));
+DESCR("SYSTEM table sampling method");
+DATA(insert OID = 3334 ( bernoulli true false tsm_bernoulli_init tsm_bernoulli_nextblock tsm_bernoulli_nexttuple - tsm_bernoulli_end tsm_bernoulli_reset tsm_bernoulli_cost ));
+DESCR("BERNOULLI table sampling method");
+
+#endif /* PG_TABLESAMPLE_METHOD_H */
diff --git a/src/include/executor/nodeSamplescan.h b/src/include/executor/nodeSamplescan.h
new file mode 100644
index 0000000000..4b769daec8
--- /dev/null
+++ b/src/include/executor/nodeSamplescan.h
@@ -0,0 +1,24 @@
+/*-------------------------------------------------------------------------
+ *
+ * nodeSamplescan.h
+ *
+ *
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/executor/nodeSamplescan.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef NODESAMPLESCAN_H
+#define NODESAMPLESCAN_H
+
+#include "nodes/execnodes.h"
+
+extern SampleScanState *ExecInitSampleScan(SampleScan *node, EState *estate, int eflags);
+extern TupleTableSlot *ExecSampleScan(SampleScanState *node);
+extern void ExecEndSampleScan(SampleScanState *node);
+extern void ExecReScanSampleScan(SampleScanState *node);
+
+#endif /* NODESAMPLESCAN_H */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index fcfe1107f9..972368019a 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1237,6 +1237,15 @@ typedef struct ScanState
*/
typedef ScanState SeqScanState;
+/*
+ * SampleScan
+ */
+typedef struct SampleScanState
+{
+ ScanState ss;
+ struct TableSampleDesc *tsdesc;
+} SampleScanState;
+
/*
* These structs store information about index quals that don't have simple
* constant right-hand sides. See comments for ExecIndexBuildScanKeys()
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 768f413a45..8b275f6e26 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -61,6 +61,7 @@ typedef enum NodeTag
T_ValuesScan,
T_CteScan,
T_WorkTableScan,
+ T_SampleScan,
T_ForeignScan,
T_CustomScan,
T_Join,
@@ -97,6 +98,7 @@ typedef enum NodeTag
T_BitmapOrState,
T_ScanState,
T_SeqScanState,
+ T_SampleScanState,
T_IndexScanState,
T_IndexOnlyScanState,
T_BitmapIndexScanState,
@@ -419,6 +421,8 @@ typedef enum NodeTag
T_OnConflictClause,
T_CommonTableExpr,
T_RoleSpec,
+ T_RangeTableSample,
+ T_TableSampleClause,
/*
* TAGS FOR REPLICATION GRAMMAR PARSE NODES (replnodes.h)
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 053f1b0121..6723f46f3f 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -335,6 +335,26 @@ typedef struct FuncCall
int location; /* token location, or -1 if unknown */
} FuncCall;
+/*
+ * TableSampleClause - a sampling method information
+ */
+typedef struct TableSampleClause
+{
+ NodeTag type;
+ Oid tsmid;
+ bool tsmseqscan;
+ bool tsmpagemode;
+ Oid tsminit;
+ Oid tsmnextblock;
+ Oid tsmnexttuple;
+ Oid tsmexaminetuple;
+ Oid tsmend;
+ Oid tsmreset;
+ Oid tsmcost;
+ Node *repeatable;
+ List *args;
+} TableSampleClause;
+
/*
* A_Star - '*' representing all columns of a table or compound field
*
@@ -535,6 +555,22 @@ typedef struct RangeFunction
* of function returning RECORD */
} RangeFunction;
+/*
+ * RangeTableSample - represents
- * ENABLE ROW LEVEL SECURITY.
+ * If polname is NULL, then this record is just indicating that ROW LEVEL
+ * SECURITY is enabled for the table. Dump as ALTER TABLE
ENABLE
+ * ROW LEVEL SECURITY.
*/
if (polinfo->polname == NULL)
{
@@ -3046,7 +3047,7 @@ binary_upgrade_set_type_oids_by_type_oid(Archive *fout,
appendPQExpBufferStr(upgrade_buffer, "\n-- For binary upgrade, must preserve pg_type oid\n");
appendPQExpBuffer(upgrade_buffer,
- "SELECT pg_catalog.binary_upgrade_set_next_pg_type_oid('%u'::pg_catalog.oid);\n\n",
+ "SELECT pg_catalog.binary_upgrade_set_next_pg_type_oid('%u'::pg_catalog.oid);\n\n",
pg_type_oid);
/* we only support old >= 8.3 for binary upgrades */
@@ -6597,7 +6598,7 @@ getTransforms(Archive *fout, int *numTransforms)
int ntups;
int i;
PQExpBuffer query;
- TransformInfo *transforminfo;
+ TransformInfo *transforminfo;
int i_tableoid;
int i_oid;
int i_trftype;
@@ -8462,7 +8463,7 @@ dumpExtension(Archive *fout, DumpOptions *dopt, ExtensionInfo *extinfo)
appendPQExpBuffer(q, "DROP EXTENSION IF EXISTS %s;\n", qextname);
appendPQExpBufferStr(q,
- "SELECT pg_catalog.binary_upgrade_create_empty_extension(");
+ "SELECT pg_catalog.binary_upgrade_create_empty_extension(");
appendStringLiteralAH(q, extinfo->dobj.name, fout);
appendPQExpBufferStr(q, ", ");
appendStringLiteralAH(q, extinfo->namespace, fout);
@@ -9367,7 +9368,7 @@ dumpDomain(Archive *fout, DumpOptions *dopt, TypeInfo *tyinfo)
for (i = 0; i < tyinfo->nDomChecks; i++)
{
ConstraintInfo *domcheck = &(tyinfo->domChecks[i]);
- PQExpBuffer labelq = createPQExpBuffer();
+ PQExpBuffer labelq = createPQExpBuffer();
appendPQExpBuffer(labelq, "CONSTRAINT %s ",
fmtId(domcheck->dobj.name));
@@ -10451,8 +10452,8 @@ dumpFunc(Archive *fout, DumpOptions *dopt, FuncInfo *finfo)
if (protrftypes != NULL && strcmp(protrftypes, "") != 0)
{
- Oid *typeids = palloc(FUNC_MAX_ARGS * sizeof(Oid));
- int i;
+ Oid *typeids = palloc(FUNC_MAX_ARGS * sizeof(Oid));
+ int i;
appendPQExpBufferStr(q, " TRANSFORM ");
parseOidArray(protrftypes, typeids, FUNC_MAX_ARGS);
@@ -10461,7 +10462,7 @@ dumpFunc(Archive *fout, DumpOptions *dopt, FuncInfo *finfo)
if (i != 0)
appendPQExpBufferStr(q, ", ");
appendPQExpBuffer(q, "FOR TYPE %s",
- getFormattedTypeName(fout, typeids[i], zeroAsNone));
+ getFormattedTypeName(fout, typeids[i], zeroAsNone));
}
}
@@ -10729,11 +10730,11 @@ dumpTransform(Archive *fout, DumpOptions *dopt, TransformInfo *transform)
lanname = get_language_name(fout, transform->trflang);
appendPQExpBuffer(delqry, "DROP TRANSFORM FOR %s LANGUAGE %s;\n",
- getFormattedTypeName(fout, transform->trftype, zeroAsNone),
+ getFormattedTypeName(fout, transform->trftype, zeroAsNone),
lanname);
appendPQExpBuffer(defqry, "CREATE TRANSFORM FOR %s LANGUAGE %s (",
- getFormattedTypeName(fout, transform->trftype, zeroAsNone),
+ getFormattedTypeName(fout, transform->trftype, zeroAsNone),
lanname);
if (!transform->trffromsql && !transform->trftosql)
@@ -10747,11 +10748,10 @@ dumpTransform(Archive *fout, DumpOptions *dopt, TransformInfo *transform)
/*
* Always qualify the function name, in case it is not in
- * pg_catalog schema (format_function_signature won't qualify
- * it).
+ * pg_catalog schema (format_function_signature won't qualify it).
*/
appendPQExpBuffer(defqry, "FROM SQL WITH FUNCTION %s.%s",
- fmtId(fromsqlFuncInfo->dobj.namespace->dobj.name), fsig);
+ fmtId(fromsqlFuncInfo->dobj.namespace->dobj.name), fsig);
free(fsig);
}
else
@@ -10769,11 +10769,10 @@ dumpTransform(Archive *fout, DumpOptions *dopt, TransformInfo *transform)
/*
* Always qualify the function name, in case it is not in
- * pg_catalog schema (format_function_signature won't qualify
- * it).
+ * pg_catalog schema (format_function_signature won't qualify it).
*/
appendPQExpBuffer(defqry, "TO SQL WITH FUNCTION %s.%s",
- fmtId(tosqlFuncInfo->dobj.namespace->dobj.name), fsig);
+ fmtId(tosqlFuncInfo->dobj.namespace->dobj.name), fsig);
free(fsig);
}
else
@@ -10783,7 +10782,7 @@ dumpTransform(Archive *fout, DumpOptions *dopt, TransformInfo *transform)
appendPQExpBuffer(defqry, ");\n");
appendPQExpBuffer(labelq, "TRANSFORM FOR %s LANGUAGE %s",
- getFormattedTypeName(fout, transform->trftype, zeroAsNone),
+ getFormattedTypeName(fout, transform->trftype, zeroAsNone),
lanname);
if (dopt->binary_upgrade)
@@ -14012,9 +14011,9 @@ dumpTableSchema(Archive *fout, DumpOptions *dopt, TableInfo *tbinfo)
* here, also updating their attlen/attalign values so that the
* dropped column can be skipped properly. (We do not bother with
* restoring the original attbyval setting.) Also, inheritance
- * relationships are set up by doing ALTER TABLE INHERIT rather than using
- * an INHERITS clause --- the latter would possibly mess up the column
- * order. That also means we have to take care about setting
+ * relationships are set up by doing ALTER TABLE INHERIT rather than
+ * using an INHERITS clause --- the latter would possibly mess up the
+ * column order. That also means we have to take care about setting
* attislocal correctly, plus fix up any inherited CHECK constraints.
* Analogously, we set up typed tables using ALTER TABLE / OF here.
*/
@@ -15473,28 +15472,28 @@ dumpRule(Archive *fout, DumpOptions *dopt, RuleInfo *rinfo)
*
* 1. Identify objects which are members of extensions
*
- * Generally speaking, this is to mark them as *not* being dumped, as most
- * extension objects are created by the single CREATE EXTENSION command.
- * The one exception is binary upgrades with pg_upgrade will still dump the
- * non-table objects.
+ * Generally speaking, this is to mark them as *not* being dumped, as most
+ * extension objects are created by the single CREATE EXTENSION command.
+ * The one exception is binary upgrades with pg_upgrade will still dump the
+ * non-table objects.
*
* 2. Identify and create dump records for extension configuration tables.
*
- * Extensions can mark tables as "configuration", which means that the user
- * is able and expected to modify those tables after the extension has been
- * loaded. For these tables, we dump out only the data- the structure is
- * expected to be handled at CREATE EXTENSION time, including any indexes or
- * foreign keys, which brings us to-
+ * Extensions can mark tables as "configuration", which means that the user
+ * is able and expected to modify those tables after the extension has been
+ * loaded. For these tables, we dump out only the data- the structure is
+ * expected to be handled at CREATE EXTENSION time, including any indexes or
+ * foreign keys, which brings us to-
*
* 3. Record FK dependencies between configuration tables.
*
- * Due to the FKs being created at CREATE EXTENSION time and therefore before
- * the data is loaded, we have to work out what the best order for reloading
- * the data is, to avoid FK violations when the tables are restored. This is
- * not perfect- we can't handle circular dependencies and if any exist they
- * will cause an invalid dump to be produced (though at least all of the data
- * is included for a user to manually restore). This is currently documented
- * but perhaps we can provide a better solution in the future.
+ * Due to the FKs being created at CREATE EXTENSION time and therefore before
+ * the data is loaded, we have to work out what the best order for reloading
+ * the data is, to avoid FK violations when the tables are restored. This is
+ * not perfect- we can't handle circular dependencies and if any exist they
+ * will cause an invalid dump to be produced (though at least all of the data
+ * is included for a user to manually restore). This is currently documented
+ * but perhaps we can provide a better solution in the future.
*/
void
getExtensionMembership(Archive *fout, DumpOptions *dopt, ExtensionInfo extinfo[],
@@ -15691,21 +15690,20 @@ getExtensionMembership(Archive *fout, DumpOptions *dopt, ExtensionInfo extinfo[]
}
/*
- * Now that all the TableInfoData objects have been created for all
- * the extensions, check their FK dependencies and register them to
- * try and dump the data out in an order which they can be restored
- * in.
+ * Now that all the TableInfoData objects have been created for all the
+ * extensions, check their FK dependencies and register them to try and
+ * dump the data out in an order which they can be restored in.
*
* Note that this is not a problem for user tables as their FKs are
* recreated after the data has been loaded.
*/
printfPQExpBuffer(query,
- "SELECT conrelid, confrelid "
- "FROM pg_constraint "
- "JOIN pg_depend ON (objid = confrelid) "
- "WHERE contype = 'f' "
- "AND refclassid = 'pg_extension'::regclass "
- "AND classid = 'pg_class'::regclass;");
+ "SELECT conrelid, confrelid "
+ "FROM pg_constraint "
+ "JOIN pg_depend ON (objid = confrelid) "
+ "WHERE contype = 'f' "
+ "AND refclassid = 'pg_extension'::regclass "
+ "AND classid = 'pg_class'::regclass;");
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
ntups = PQntuples(res);
@@ -15716,8 +15714,10 @@ getExtensionMembership(Archive *fout, DumpOptions *dopt, ExtensionInfo extinfo[]
/* Now get the dependencies and register them */
for (i = 0; i < ntups; i++)
{
- Oid conrelid, confrelid;
- TableInfo *reftable, *contable;
+ Oid conrelid,
+ confrelid;
+ TableInfo *reftable,
+ *contable;
conrelid = atooid(PQgetvalue(res, i, i_conrelid));
confrelid = atooid(PQgetvalue(res, i, i_confrelid));
@@ -15731,8 +15731,8 @@ getExtensionMembership(Archive *fout, DumpOptions *dopt, ExtensionInfo extinfo[]
continue;
/*
- * Make referencing TABLE_DATA object depend on the
- * referenced table's TABLE_DATA object.
+ * Make referencing TABLE_DATA object depend on the referenced table's
+ * TABLE_DATA object.
*/
addObjectDependency(&contable->dataObj->dobj,
reftable->dataObj->dobj.dumpId);
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index 4c796ad6a7..009dba5c9d 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -471,7 +471,7 @@ typedef struct _policyInfo
{
DumpableObject dobj;
TableInfo *poltable;
- char *polname; /* null indicates RLS is enabled on rel */
+ char *polname; /* null indicates RLS is enabled on rel */
char *polcmd;
char *polroles;
char *polqual;
diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c
index 2c72e19f2d..c6b9326cb1 100644
--- a/src/bin/pg_dump/pg_dumpall.c
+++ b/src/bin/pg_dump/pg_dumpall.c
@@ -1422,7 +1422,7 @@ dumpCreateDB(PGconn *conn)
{
appendPQExpBufferStr(buf, "-- For binary upgrade, set datfrozenxid and datminmxid.\n");
appendPQExpBuffer(buf, "UPDATE pg_catalog.pg_database "
- "SET datfrozenxid = '%u', datminmxid = '%u' "
+ "SET datfrozenxid = '%u', datminmxid = '%u' "
"WHERE datname = ",
dbfrozenxid, dbminmxid);
appendStringLiteralConn(buf, dbname, conn);
diff --git a/src/bin/pg_resetxlog/pg_resetxlog.c b/src/bin/pg_resetxlog/pg_resetxlog.c
index 393d580154..6ffe795348 100644
--- a/src/bin/pg_resetxlog/pg_resetxlog.c
+++ b/src/bin/pg_resetxlog/pg_resetxlog.c
@@ -914,9 +914,9 @@ FindEndOfXLOG(void)
XLogSegNo segno;
/*
- * Note: We don't use XLogFromFileName here, because we want
- * to use the segment size from the control file, not the size
- * the pg_resetxlog binary was compiled with
+ * Note: We don't use XLogFromFileName here, because we want to
+ * use the segment size from the control file, not the size the
+ * pg_resetxlog binary was compiled with
*/
sscanf(xlde->d_name, "%08X%08X%08X", &tli, &log, &seg);
segno = ((uint64) log) * segs_per_xlogid + seg;
diff --git a/src/bin/pg_rewind/RewindTest.pm b/src/bin/pg_rewind/RewindTest.pm
index 6ea2f871aa..5219ec967a 100644
--- a/src/bin/pg_rewind/RewindTest.pm
+++ b/src/bin/pg_rewind/RewindTest.pm
@@ -67,8 +67,8 @@ our @EXPORT = qw(
# for debugging purposes,
my $testroot = tempdir;
-our $test_master_datadir="$testroot/data_master";
-our $test_standby_datadir="$testroot/data_standby";
+our $test_master_datadir = "$testroot/data_master";
+our $test_standby_datadir = "$testroot/data_standby";
mkdir $testroot;
@@ -76,14 +76,14 @@ mkdir $testroot;
mkdir "regress_log";
# Define non-conflicting ports for both nodes.
-my $port_master=$ENV{PGPORT};
-my $port_standby=$port_master + 1;
+my $port_master = $ENV{PGPORT};
+my $port_standby = $port_master + 1;
my $log_path;
my $tempdir_short;
-my $connstr_master="port=$port_master";
-my $connstr_standby="port=$port_standby";
+my $connstr_master = "port=$port_master";
+my $connstr_standby = "port=$port_standby";
$ENV{PGDATABASE} = "postgres";
@@ -109,19 +109,25 @@ sub check_query
my ($stdout, $stderr);
# we want just the output, no formatting
- my $result = run ['psql', '-q', '-A', '-t', '--no-psqlrc',
- '-d', $connstr_master,
- '-c' , $query],
- '>', \$stdout, '2>', \$stderr;
+ my $result = run [
+ 'psql', '-q', '-A', '-t', '--no-psqlrc', '-d',
+ $connstr_master, '-c', $query ],
+ '>', \$stdout, '2>', \$stderr;
+
# We don't use ok() for the exit code and stderr, because we want this
# check to be just a single test.
- if (!$result) {
- fail ("$test_name: psql exit code");
- } elsif ($stderr ne '') {
+ if (!$result)
+ {
+ fail("$test_name: psql exit code");
+ }
+ elsif ($stderr ne '')
+ {
diag $stderr;
- fail ("$test_name: psql no stderr");
- } else {
- is ($stdout, $expected_stdout, "$test_name: query result matches");
+ fail("$test_name: psql no stderr");
+ }
+ else
+ {
+ is($stdout, $expected_stdout, "$test_name: query result matches");
}
}
@@ -131,12 +137,12 @@ sub poll_query_until
my ($query, $connstr) = @_;
my $max_attempts = 30;
- my $attempts = 0;
+ my $attempts = 0;
my ($stdout, $stderr);
while ($attempts < $max_attempts)
{
- my $cmd = ['psql', '-At', '-c', "$query", '-d', "$connstr" ];
+ my $cmd = [ 'psql', '-At', '-c', "$query", '-d', "$connstr" ];
my $result = run $cmd, '>', \$stdout, '2>', \$stderr;
chomp($stdout);
@@ -158,7 +164,7 @@ sub poll_query_until
sub append_to_file
{
- my($filename, $str) = @_;
+ my ($filename, $str) = @_;
open my $fh, ">>", $filename or die "could not open file $filename";
print $fh $str;
@@ -167,10 +173,10 @@ sub append_to_file
sub init_rewind_test
{
- my $testname = shift;
+ my $testname = shift;
my $test_mode = shift;
- $log_path="regress_log/pg_rewind_log_${testname}_${test_mode}";
+ $log_path = "regress_log/pg_rewind_log_${testname}_${test_mode}";
remove_tree $log_path;
}
@@ -184,7 +190,8 @@ sub setup_cluster
standard_initdb($test_master_datadir);
# Custom parameters for master's postgresql.conf
- append_to_file("$test_master_datadir/postgresql.conf", qq(
+ append_to_file(
+ "$test_master_datadir/postgresql.conf", qq(
wal_level = hot_standby
max_wal_senders = 2
wal_keep_segments = 20
@@ -197,38 +204,47 @@ max_connections = 10
));
# Accept replication connections on master
- append_to_file("$test_master_datadir/pg_hba.conf", qq(
+ append_to_file(
+ "$test_master_datadir/pg_hba.conf", qq(
local replication all trust
));
- system_or_bail("pg_ctl -w -D $test_master_datadir -o \"-k $tempdir_short --listen-addresses='' -p $port_master\" start >>$log_path 2>&1");
+ system_or_bail(
+"pg_ctl -w -D $test_master_datadir -o \"-k $tempdir_short --listen-addresses='' -p $port_master\" start >>$log_path 2>&1"
+ );
#### Now run the test-specific parts to initialize the master before setting
# up standby
- $ENV{PGHOST} = $tempdir_short;
+ $ENV{PGHOST} = $tempdir_short;
}
sub create_standby
{
+
# Set up standby with necessary parameter
remove_tree $test_standby_datadir;
# Base backup is taken with xlog files included
- system_or_bail("pg_basebackup -D $test_standby_datadir -p $port_master -x >>$log_path 2>&1");
- append_to_file("$test_standby_datadir/recovery.conf", qq(
+ system_or_bail(
+"pg_basebackup -D $test_standby_datadir -p $port_master -x >>$log_path 2>&1");
+ append_to_file(
+ "$test_standby_datadir/recovery.conf", qq(
primary_conninfo='$connstr_master application_name=rewind_standby'
standby_mode=on
recovery_target_timeline='latest'
));
# Start standby
- system_or_bail("pg_ctl -w -D $test_standby_datadir -o \"-k $tempdir_short --listen-addresses='' -p $port_standby\" start >>$log_path 2>&1");
+ system_or_bail(
+"pg_ctl -w -D $test_standby_datadir -o \"-k $tempdir_short --listen-addresses='' -p $port_standby\" start >>$log_path 2>&1"
+ );
# Wait until the standby has caught up with the primary, by polling
# pg_stat_replication.
- my $caughtup_query = "SELECT pg_current_xlog_location() = replay_location FROM pg_stat_replication WHERE application_name = 'rewind_standby';";
+ my $caughtup_query =
+"SELECT pg_current_xlog_location() = replay_location FROM pg_stat_replication WHERE application_name = 'rewind_standby';";
poll_query_until($caughtup_query, $connstr_master)
- or die "Timed out while waiting for standby to catch up";
+ or die "Timed out while waiting for standby to catch up";
}
sub promote_standby
@@ -239,9 +255,10 @@ sub promote_standby
# Now promote slave and insert some new data on master, this will put
# the master out-of-sync with the standby. Wait until the standby is
# out of recovery mode, and is ready to accept read-write connections.
- system_or_bail("pg_ctl -w -D $test_standby_datadir promote >>$log_path 2>&1");
+ system_or_bail(
+ "pg_ctl -w -D $test_standby_datadir promote >>$log_path 2>&1");
poll_query_until("SELECT NOT pg_is_in_recovery()", $connstr_standby)
- or die "Timed out while waiting for promotion of standby";
+ or die "Timed out while waiting for promotion of standby";
# Force a checkpoint after the promotion. pg_rewind looks at the control
# file todetermine what timeline the server is on, and that isn't updated
@@ -257,7 +274,8 @@ sub run_pg_rewind
my $test_mode = shift;
# Stop the master and be ready to perform the rewind
- system_or_bail("pg_ctl -w -D $test_master_datadir stop -m fast >>$log_path 2>&1");
+ system_or_bail(
+ "pg_ctl -w -D $test_master_datadir stop -m fast >>$log_path 2>&1");
# At this point, the rewind processing is ready to run.
# We now have a very simple scenario with a few diverged WAL record.
@@ -266,47 +284,67 @@ sub run_pg_rewind
# Keep a temporary postgresql.conf for master node or it would be
# overwritten during the rewind.
- copy("$test_master_datadir/postgresql.conf", "$testroot/master-postgresql.conf.tmp");
+ copy(
+ "$test_master_datadir/postgresql.conf",
+ "$testroot/master-postgresql.conf.tmp");
+
# Now run pg_rewind
if ($test_mode eq "local")
{
+
# Do rewind using a local pgdata as source
# Stop the master and be ready to perform the rewind
- system_or_bail("pg_ctl -w -D $test_standby_datadir stop -m fast >>$log_path 2>&1");
- my $result =
- run(['pg_rewind',
- "--debug",
- "--source-pgdata=$test_standby_datadir",
- "--target-pgdata=$test_master_datadir"],
- '>>', $log_path, '2>&1');
- ok ($result, 'pg_rewind local');
+ system_or_bail(
+ "pg_ctl -w -D $test_standby_datadir stop -m fast >>$log_path 2>&1"
+ );
+ my $result = run(
+ [ 'pg_rewind',
+ "--debug",
+ "--source-pgdata=$test_standby_datadir",
+ "--target-pgdata=$test_master_datadir" ],
+ '>>',
+ $log_path,
+ '2>&1');
+ ok($result, 'pg_rewind local');
}
elsif ($test_mode eq "remote")
{
+
# Do rewind using a remote connection as source
- my $result =
- run(['pg_rewind',
- "--source-server", "port=$port_standby dbname=postgres",
- "--target-pgdata=$test_master_datadir"],
- '>>', $log_path, '2>&1');
- ok ($result, 'pg_rewind remote');
- } else {
+ my $result = run(
+ [ 'pg_rewind',
+ "--source-server",
+ "port=$port_standby dbname=postgres",
+ "--target-pgdata=$test_master_datadir" ],
+ '>>',
+ $log_path,
+ '2>&1');
+ ok($result, 'pg_rewind remote');
+ }
+ else
+ {
+
# Cannot come here normally
die("Incorrect test mode specified");
}
# Now move back postgresql.conf with old settings
- move("$testroot/master-postgresql.conf.tmp", "$test_master_datadir/postgresql.conf");
+ move(
+ "$testroot/master-postgresql.conf.tmp",
+ "$test_master_datadir/postgresql.conf");
# Plug-in rewound node to the now-promoted standby node
- append_to_file("$test_master_datadir/recovery.conf", qq(
+ append_to_file(
+ "$test_master_datadir/recovery.conf", qq(
primary_conninfo='port=$port_standby'
standby_mode=on
recovery_target_timeline='latest'
));
# Restart the master to check that rewind went correctly
- system_or_bail("pg_ctl -w -D $test_master_datadir -o \"-k $tempdir_short --listen-addresses='' -p $port_master\" start >>$log_path 2>&1");
+ system_or_bail(
+"pg_ctl -w -D $test_master_datadir -o \"-k $tempdir_short --listen-addresses='' -p $port_master\" start >>$log_path 2>&1"
+ );
#### Now run the test-specific parts to check the result
}
@@ -316,11 +354,13 @@ sub clean_rewind_test
{
if ($test_master_datadir)
{
- system "pg_ctl -D $test_master_datadir -s -m immediate stop 2> /dev/null";
+ system
+ "pg_ctl -D $test_master_datadir -s -m immediate stop 2> /dev/null";
}
if ($test_standby_datadir)
{
- system "pg_ctl -D $test_standby_datadir -s -m immediate stop 2> /dev/null";
+ system
+ "pg_ctl -D $test_standby_datadir -s -m immediate stop 2> /dev/null";
}
}
diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index 1a56866fd4..cb2bf4d1a0 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -185,13 +185,13 @@ process_source_file(const char *path, file_type_t type, size_t newsize,
*
* If it's smaller in the target, it means that it has been
* truncated in the target, or enlarged in the source, or
- * both. If it was truncated in the target, we need to copy the
- * missing tail from the source system. If it was enlarged in
- * the source system, there will be WAL records in the source
- * system for the new blocks, so we wouldn't need to copy them
- * here. But we don't know which scenario we're dealing with,
- * and there's no harm in copying the missing blocks now, so
- * do it now.
+ * both. If it was truncated in the target, we need to copy
+ * the missing tail from the source system. If it was enlarged
+ * in the source system, there will be WAL records in the
+ * source system for the new blocks, so we wouldn't need to
+ * copy them here. But we don't know which scenario we're
+ * dealing with, and there's no harm in copying the missing
+ * blocks now, so do it now.
*
* If it's the same size, do nothing here. Any blocks modified
* in the target will be copied based on parsing the target
@@ -370,6 +370,7 @@ process_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno)
break;
case FILE_ACTION_COPY_TAIL:
+
/*
* skip the modified block if it is part of the "tail" that
* we're copying anyway.
@@ -391,8 +392,8 @@ process_block_change(ForkNumber forknum, RelFileNode rnode, BlockNumber blkno)
/*
* If we don't have any record of this file in the file map, it means
* that it's a relation that doesn't exist in the source system, and
- * it was subsequently removed in the target system, too. We can safely
- * ignore it.
+ * it was subsequently removed in the target system, too. We can
+ * safely ignore it.
*/
}
}
diff --git a/src/bin/pg_rewind/filemap.h b/src/bin/pg_rewind/filemap.h
index 73113ecdeb..9943ec5f26 100644
--- a/src/bin/pg_rewind/filemap.h
+++ b/src/bin/pg_rewind/filemap.h
@@ -23,13 +23,13 @@
*/
typedef enum
{
- FILE_ACTION_CREATE, /* create local directory or symbolic link */
- FILE_ACTION_COPY, /* copy whole file, overwriting if exists */
- FILE_ACTION_COPY_TAIL, /* copy tail from 'oldsize' to 'newsize' */
- FILE_ACTION_NONE, /* no action (we might still copy modified blocks
- * based on the parsed WAL) */
- FILE_ACTION_TRUNCATE, /* truncate local file to 'newsize' bytes */
- FILE_ACTION_REMOVE /* remove local file / directory / symlink */
+ FILE_ACTION_CREATE, /* create local directory or symbolic link */
+ FILE_ACTION_COPY, /* copy whole file, overwriting if exists */
+ FILE_ACTION_COPY_TAIL, /* copy tail from 'oldsize' to 'newsize' */
+ FILE_ACTION_NONE, /* no action (we might still copy modified
+ * blocks based on the parsed WAL) */
+ FILE_ACTION_TRUNCATE, /* truncate local file to 'newsize' bytes */
+ FILE_ACTION_REMOVE /* remove local file / directory / symlink */
} file_action_t;
typedef enum
@@ -51,10 +51,10 @@ typedef struct file_entry_t
size_t newsize;
bool isrelfile; /* is it a relation data file? */
- datapagemap_t pagemap;
+ datapagemap_t pagemap;
/* for a symlink */
- char *link_target;
+ char *link_target;
struct file_entry_t *next;
} file_entry_t;
@@ -72,16 +72,16 @@ typedef struct filemap_t
/*
* After processing all the remote files, the entries in the linked list
* are moved to this array. After processing local files, too, all the
- * local entries are added to the array by filemap_finalize, and sorted
- * in the final order. After filemap_finalize, all the entries are in
- * the array, and the linked list is empty.
+ * local entries are added to the array by filemap_finalize, and sorted in
+ * the final order. After filemap_finalize, all the entries are in the
+ * array, and the linked list is empty.
*/
file_entry_t **array;
int narray; /* current length of array */
/*
- * Summary information. total_size is the total size of the source cluster,
- * and fetch_size is the number of bytes that needs to be copied.
+ * Summary information. total_size is the total size of the source
+ * cluster, and fetch_size is the number of bytes that needs to be copied.
*/
uint64 total_size;
uint64 fetch_size;
diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index 9c112701e5..fca771d8cb 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -236,7 +236,7 @@ SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
{
XLogPageReadPrivate *private = (XLogPageReadPrivate *) xlogreader->private_data;
uint32 targetPageOff;
- XLogSegNo targetSegNo PG_USED_FOR_ASSERTS_ONLY;
+ XLogSegNo targetSegNo PG_USED_FOR_ASSERTS_ONLY;
XLByteToSeg(targetPagePtr, targetSegNo);
targetPageOff = targetPagePtr % XLogSegSize;
@@ -315,10 +315,10 @@ extractPageInfo(XLogReaderState *record)
/*
* New databases can be safely ignored. It won't be present in the
* source system, so it will be deleted. There's one corner-case,
- * though: if a new, different, database is also created in the
- * source system, we'll see that the files already exist and not copy
- * them. That's OK, though; WAL replay of creating the new database,
- * from the source systems's WAL, will re-copy the new database,
+ * though: if a new, different, database is also created in the source
+ * system, we'll see that the files already exist and not copy them.
+ * That's OK, though; WAL replay of creating the new database, from
+ * the source systems's WAL, will re-copy the new database,
* overwriting the database created in the target system.
*/
}
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index d3ae7674d7..8088be4fab 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -490,15 +490,15 @@ createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, XLogRecPtr checkpo
"BACKUP METHOD: pg_rewind\n"
"BACKUP FROM: standby\n"
"START TIME: %s\n",
- /* omit LABEL: line */
- (uint32) (startpoint >> 32), (uint32) startpoint, xlogfilename,
+ /* omit LABEL: line */
+ (uint32) (startpoint >> 32), (uint32) startpoint, xlogfilename,
(uint32) (checkpointloc >> 32), (uint32) checkpointloc,
strfbuf);
if (len >= sizeof(buf))
- pg_fatal("backup label buffer too small\n"); /* shouldn't happen */
+ pg_fatal("backup label buffer too small\n"); /* shouldn't happen */
/* TODO: move old file out of the way, if any. */
- open_target_file("backup_label", true); /* BACKUP_LABEL_FILE */
+ open_target_file("backup_label", true); /* BACKUP_LABEL_FILE */
write_target_range(buf, 0, len);
}
diff --git a/src/bin/pg_rewind/t/001_basic.pl b/src/bin/pg_rewind/t/001_basic.pl
index a1d679f6b8..f60368bd30 100644
--- a/src/bin/pg_rewind/t/001_basic.pl
+++ b/src/bin/pg_rewind/t/001_basic.pl
@@ -32,8 +32,11 @@ sub run_test
# Insert additional data on master that will be replicated to standby
master_psql("INSERT INTO tbl1 values ('in master, before promotion')");
- master_psql("INSERT INTO trunc_tbl values ('in master, before promotion')");
- master_psql("INSERT INTO tail_tbl SELECT g, 'in master, before promotion: ' || g FROM generate_series(1, 10000) g");
+ master_psql(
+ "INSERT INTO trunc_tbl values ('in master, before promotion')");
+ master_psql(
+"INSERT INTO tail_tbl SELECT g, 'in master, before promotion: ' || g FROM generate_series(1, 10000) g"
+ );
master_psql('CHECKPOINT');
@@ -50,7 +53,9 @@ sub run_test
# Insert enough rows to trunc_tbl to extend the file. pg_rewind should
# truncate it back to the old size.
- master_psql("INSERT INTO trunc_tbl SELECT 'in master, after promotion: ' || g FROM generate_series(1, 10000) g");
+ master_psql(
+"INSERT INTO trunc_tbl SELECT 'in master, after promotion: ' || g FROM generate_series(1, 10000) g"
+ );
# Truncate tail_tbl. pg_rewind should copy back the truncated part
# (We cannot use an actual TRUNCATE command here, as that creates a
@@ -60,20 +65,23 @@ sub run_test
RewindTest::run_pg_rewind($test_mode);
- check_query('SELECT * FROM tbl1',
+ check_query(
+ 'SELECT * FROM tbl1',
qq(in master
in master, before promotion
in standby, after promotion
),
'table content');
- check_query('SELECT * FROM trunc_tbl',
+ check_query(
+ 'SELECT * FROM trunc_tbl',
qq(in master
in master, before promotion
),
'truncation');
- check_query('SELECT count(*) FROM tail_tbl',
+ check_query(
+ 'SELECT count(*) FROM tail_tbl',
qq(10001
),
'tail-copy');
diff --git a/src/bin/pg_rewind/t/002_databases.pl b/src/bin/pg_rewind/t/002_databases.pl
index be1e1948a7..7564fa98a5 100644
--- a/src/bin/pg_rewind/t/002_databases.pl
+++ b/src/bin/pg_rewind/t/002_databases.pl
@@ -25,20 +25,22 @@ sub run_test
# Create databases in the old master and the new promoted standby.
master_psql('CREATE DATABASE master_afterpromotion');
standby_psql('CREATE DATABASE standby_afterpromotion');
+
# The clusters are now diverged.
RewindTest::run_pg_rewind($test_mode);
# Check that the correct databases are present after pg_rewind.
- check_query('SELECT datname FROM pg_database',
- qq(template1
+ check_query(
+ 'SELECT datname FROM pg_database',
+ qq(template1
template0
postgres
inmaster
beforepromotion
standby_afterpromotion
),
- 'database names');
+ 'database names');
RewindTest::clean_rewind_test();
}
diff --git a/src/bin/pg_rewind/t/003_extrafiles.pl b/src/bin/pg_rewind/t/003_extrafiles.pl
index ed50659195..9a952685be 100644
--- a/src/bin/pg_rewind/t/003_extrafiles.pl
+++ b/src/bin/pg_rewind/t/003_extrafiles.pl
@@ -24,44 +24,58 @@ sub run_test
append_to_file "$test_master_datadir/tst_both_dir/both_file1", "in both1";
append_to_file "$test_master_datadir/tst_both_dir/both_file2", "in both2";
mkdir "$test_master_datadir/tst_both_dir/both_subdir/";
- append_to_file "$test_master_datadir/tst_both_dir/both_subdir/both_file3", "in both3";
+ append_to_file "$test_master_datadir/tst_both_dir/both_subdir/both_file3",
+ "in both3";
RewindTest::create_standby();
# Create different subdirs and files in master and standby
mkdir "$test_standby_datadir/tst_standby_dir";
- append_to_file "$test_standby_datadir/tst_standby_dir/standby_file1", "in standby1";
- append_to_file "$test_standby_datadir/tst_standby_dir/standby_file2", "in standby2";
+ append_to_file "$test_standby_datadir/tst_standby_dir/standby_file1",
+ "in standby1";
+ append_to_file "$test_standby_datadir/tst_standby_dir/standby_file2",
+ "in standby2";
mkdir "$test_standby_datadir/tst_standby_dir/standby_subdir/";
- append_to_file "$test_standby_datadir/tst_standby_dir/standby_subdir/standby_file3", "in standby3";
+ append_to_file
+ "$test_standby_datadir/tst_standby_dir/standby_subdir/standby_file3",
+ "in standby3";
mkdir "$test_master_datadir/tst_master_dir";
- append_to_file "$test_master_datadir/tst_master_dir/master_file1", "in master1";
- append_to_file "$test_master_datadir/tst_master_dir/master_file2", "in master2";
+ append_to_file "$test_master_datadir/tst_master_dir/master_file1",
+ "in master1";
+ append_to_file "$test_master_datadir/tst_master_dir/master_file2",
+ "in master2";
mkdir "$test_master_datadir/tst_master_dir/master_subdir/";
- append_to_file "$test_master_datadir/tst_master_dir/master_subdir/master_file3", "in master3";
+ append_to_file
+ "$test_master_datadir/tst_master_dir/master_subdir/master_file3",
+ "in master3";
RewindTest::promote_standby();
RewindTest::run_pg_rewind($test_mode);
# List files in the data directory after rewind.
my @paths;
- find(sub {push @paths, $File::Find::name if $File::Find::name =~ m/.*tst_.*/},
- $test_master_datadir);
+ find(
+ sub {
+ push @paths, $File::Find::name
+ if $File::Find::name =~ m/.*tst_.*/;
+ },
+ $test_master_datadir);
@paths = sort @paths;
- is_deeply(\@paths,
- ["$test_master_datadir/tst_both_dir",
- "$test_master_datadir/tst_both_dir/both_file1",
- "$test_master_datadir/tst_both_dir/both_file2",
- "$test_master_datadir/tst_both_dir/both_subdir",
- "$test_master_datadir/tst_both_dir/both_subdir/both_file3",
- "$test_master_datadir/tst_standby_dir",
- "$test_master_datadir/tst_standby_dir/standby_file1",
- "$test_master_datadir/tst_standby_dir/standby_file2",
- "$test_master_datadir/tst_standby_dir/standby_subdir",
- "$test_master_datadir/tst_standby_dir/standby_subdir/standby_file3"],
- "file lists match");
+ is_deeply(
+ \@paths,
+ [ "$test_master_datadir/tst_both_dir",
+ "$test_master_datadir/tst_both_dir/both_file1",
+ "$test_master_datadir/tst_both_dir/both_file2",
+ "$test_master_datadir/tst_both_dir/both_subdir",
+ "$test_master_datadir/tst_both_dir/both_subdir/both_file3",
+ "$test_master_datadir/tst_standby_dir",
+ "$test_master_datadir/tst_standby_dir/standby_file1",
+ "$test_master_datadir/tst_standby_dir/standby_file2",
+ "$test_master_datadir/tst_standby_dir/standby_subdir",
+"$test_master_datadir/tst_standby_dir/standby_subdir/standby_file3" ],
+ "file lists match");
RewindTest::clean_rewind_test();
}
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c
index 99c66be7fb..5a91871c35 100644
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -317,16 +317,16 @@ equivalent_locale(int category, const char *loca, const char *locb)
int lenb;
/*
- * If the names are equal, the locales are equivalent. Checking this
- * first avoids calling setlocale() in the common case that the names
- * are equal. That's a good thing, if setlocale() is buggy, for example.
+ * If the names are equal, the locales are equivalent. Checking this first
+ * avoids calling setlocale() in the common case that the names are equal.
+ * That's a good thing, if setlocale() is buggy, for example.
*/
if (pg_strcasecmp(loca, locb) == 0)
return true;
/*
- * Not identical. Canonicalize both names, remove the encoding parts,
- * and try again.
+ * Not identical. Canonicalize both names, remove the encoding parts, and
+ * try again.
*/
canona = get_canonical_locale_name(category, loca);
chara = strrchr(canona, '.');
@@ -512,7 +512,7 @@ create_script_for_old_cluster_deletion(char **deletion_script_file_name)
{
/* reproduce warning from CREATE TABLESPACE that is in the log */
pg_log(PG_WARNING,
- "\nWARNING: user-defined tablespace locations should not be inside the data directory, e.g. %s\n", old_tablespace_dir);
+ "\nWARNING: user-defined tablespace locations should not be inside the data directory, e.g. %s\n", old_tablespace_dir);
/* Unlink file in case it is left over from a previous run. */
unlink(*deletion_script_file_name);
@@ -611,8 +611,8 @@ check_is_install_user(ClusterInfo *cluster)
/*
* We only allow the install user in the new cluster (see comment below)
- * and we preserve pg_authid.oid, so this must be the install user in
- * the old cluster too.
+ * and we preserve pg_authid.oid, so this must be the install user in the
+ * old cluster too.
*/
if (PQntuples(res) != 1 ||
atooid(PQgetvalue(res, 0, 1)) != BOOTSTRAP_SUPERUSERID)
@@ -681,10 +681,13 @@ check_proper_datallowconn(ClusterInfo *cluster)
}
else
{
- /* avoid datallowconn == false databases from being skipped on restore */
+ /*
+ * avoid datallowconn == false databases from being skipped on
+ * restore
+ */
if (strcmp(datallowconn, "f") == 0)
pg_fatal("All non-template0 databases must allow connections, "
- "i.e. their pg_database.datallowconn must be true\n");
+ "i.e. their pg_database.datallowconn must be true\n");
}
}
@@ -873,7 +876,7 @@ check_for_reg_data_type_usage(ClusterInfo *cluster)
" 'pg_catalog.regconfig'::pg_catalog.regtype, "
" 'pg_catalog.regdictionary'::pg_catalog.regtype) AND "
" c.relnamespace = n.oid AND "
- " n.nspname NOT IN ('pg_catalog', 'information_schema')");
+ " n.nspname NOT IN ('pg_catalog', 'information_schema')");
ntups = PQntuples(res);
i_nspname = PQfnumber(res, "nspname");
@@ -964,7 +967,7 @@ check_for_jsonb_9_4_usage(ClusterInfo *cluster)
" c.relnamespace = n.oid AND "
/* exclude possible orphaned temp tables */
" n.nspname !~ '^pg_temp_' AND "
- " n.nspname NOT IN ('pg_catalog', 'information_schema')");
+ " n.nspname NOT IN ('pg_catalog', 'information_schema')");
ntups = PQntuples(res);
i_nspname = PQfnumber(res, "nspname");
@@ -999,7 +1002,7 @@ check_for_jsonb_9_4_usage(ClusterInfo *cluster)
{
pg_log(PG_REPORT, "fatal\n");
pg_fatal("Your installation contains one of the JSONB data types in user tables.\n"
- "The internal format of JSONB changed during 9.4 beta so this cluster cannot currently\n"
+ "The internal format of JSONB changed during 9.4 beta so this cluster cannot currently\n"
"be upgraded. You can remove the problem tables and restart the upgrade. A list\n"
"of the problem columns is in the file:\n"
" %s\n\n", output_path);
diff --git a/src/bin/pg_upgrade/dump.c b/src/bin/pg_upgrade/dump.c
index 2c20e847ac..6d6f84d725 100644
--- a/src/bin/pg_upgrade/dump.c
+++ b/src/bin/pg_upgrade/dump.c
@@ -111,7 +111,7 @@ optionally_create_toast_tables(void)
"FROM pg_catalog.pg_class c, "
" pg_catalog.pg_namespace n "
"WHERE c.relnamespace = n.oid AND "
- " n.nspname NOT IN ('pg_catalog', 'information_schema') AND "
+ " n.nspname NOT IN ('pg_catalog', 'information_schema') AND "
"c.relkind IN ('r', 'm') AND "
"c.reltoastrelid = 0");
@@ -122,12 +122,12 @@ optionally_create_toast_tables(void)
{
/* enable auto-oid-numbered TOAST creation if needed */
PQclear(executeQueryOrDie(conn, "SELECT pg_catalog.binary_upgrade_set_next_toast_pg_class_oid('%d'::pg_catalog.oid);",
- OPTIONALLY_CREATE_TOAST_OID));
+ OPTIONALLY_CREATE_TOAST_OID));
/* dummy command that also triggers check for required TOAST table */
PQclear(executeQueryOrDie(conn, "ALTER TABLE %s.%s RESET (binary_upgrade_dummy_option);",
- quote_identifier(PQgetvalue(res, rowno, i_nspname)),
- quote_identifier(PQgetvalue(res, rowno, i_relname))));
+ quote_identifier(PQgetvalue(res, rowno, i_nspname)),
+ quote_identifier(PQgetvalue(res, rowno, i_relname))));
}
PQclear(res);
diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c
index c0a5601209..e158c9ff8b 100644
--- a/src/bin/pg_upgrade/info.c
+++ b/src/bin/pg_upgrade/info.c
@@ -38,16 +38,16 @@ gen_db_file_maps(DbInfo *old_db, DbInfo *new_db,
int *nmaps, const char *old_pgdata, const char *new_pgdata)
{
FileNameMap *maps;
- int old_relnum, new_relnum;
+ int old_relnum,
+ new_relnum;
int num_maps = 0;
maps = (FileNameMap *) pg_malloc(sizeof(FileNameMap) *
old_db->rel_arr.nrels);
/*
- * The old database shouldn't have more relations than the new one.
- * We force the new cluster to have a TOAST table if the old table
- * had one.
+ * The old database shouldn't have more relations than the new one. We
+ * force the new cluster to have a TOAST table if the old table had one.
*/
if (old_db->rel_arr.nrels > new_db->rel_arr.nrels)
pg_fatal("old and new databases \"%s\" have a mismatched number of relations\n",
@@ -62,15 +62,15 @@ gen_db_file_maps(DbInfo *old_db, DbInfo *new_db,
/*
* It is possible that the new cluster has a TOAST table for a table
- * that didn't need one in the old cluster, e.g. 9.0 to 9.1 changed the
- * NUMERIC length computation. Therefore, if we have a TOAST table
- * in the new cluster that doesn't match, skip over it and continue
- * processing. It is possible this TOAST table used an OID that was
- * reserved in the old cluster, but we have no way of testing that,
- * and we would have already gotten an error at the new cluster schema
- * creation stage. Fortunately, since we only restore the OID counter
- * after schema restore, and restore in OID order via pg_dump, a
- * conflict would only happen if the new TOAST table had a very low
+ * that didn't need one in the old cluster, e.g. 9.0 to 9.1 changed
+ * the NUMERIC length computation. Therefore, if we have a TOAST
+ * table in the new cluster that doesn't match, skip over it and
+ * continue processing. It is possible this TOAST table used an OID
+ * that was reserved in the old cluster, but we have no way of testing
+ * that, and we would have already gotten an error at the new cluster
+ * schema creation stage. Fortunately, since we only restore the OID
+ * counter after schema restore, and restore in OID order via pg_dump,
+ * a conflict would only happen if the new TOAST table had a very low
* OID. However, TOAST tables created long after initial table
* creation can have any OID, particularly after OID wraparound.
*/
@@ -330,75 +330,77 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
*/
snprintf(query, sizeof(query),
- /* get regular heap */
- "WITH regular_heap (reloid) AS ( "
- " SELECT c.oid "
- " FROM pg_catalog.pg_class c JOIN pg_catalog.pg_namespace n "
- " ON c.relnamespace = n.oid "
- " LEFT OUTER JOIN pg_catalog.pg_index i "
- " ON c.oid = i.indexrelid "
- " WHERE relkind IN ('r', 'm', 'i', 'S') AND "
- /*
- * pg_dump only dumps valid indexes; testing indisready is necessary in
- * 9.2, and harmless in earlier/later versions.
- */
- " i.indisvalid IS DISTINCT FROM false AND "
- " i.indisready IS DISTINCT FROM false AND "
- /* exclude possible orphaned temp tables */
- " ((n.nspname !~ '^pg_temp_' AND "
- " n.nspname !~ '^pg_toast_temp_' AND "
- /* skip pg_toast because toast index have relkind == 'i', not 't' */
- " n.nspname NOT IN ('pg_catalog', 'information_schema', "
- " 'binary_upgrade', 'pg_toast') AND "
- " c.oid >= %u) OR "
- " (n.nspname = 'pg_catalog' AND "
- " relname IN ('pg_largeobject', 'pg_largeobject_loid_pn_index'%s) ))), "
- /*
- * We have to gather the TOAST tables in later steps because we
- * can't schema-qualify TOAST tables.
- */
- /* get TOAST heap */
- " toast_heap (reloid) AS ( "
- " SELECT reltoastrelid "
- " FROM regular_heap JOIN pg_catalog.pg_class c "
- " ON regular_heap.reloid = c.oid "
- " AND c.reltoastrelid != %u), "
- /* get indexes on regular and TOAST heap */
- " all_index (reloid) AS ( "
- " SELECT indexrelid "
- " FROM pg_index "
- " WHERE indisvalid "
- " AND indrelid IN (SELECT reltoastrelid "
- " FROM (SELECT reloid FROM regular_heap "
- " UNION ALL "
- " SELECT reloid FROM toast_heap) all_heap "
- " JOIN pg_catalog.pg_class c "
- " ON all_heap.reloid = c.oid "
- " AND c.reltoastrelid != %u)) "
- /* get all rels */
- "SELECT c.oid, n.nspname, c.relname, "
- " c.relfilenode, c.reltablespace, %s "
- "FROM (SELECT reloid FROM regular_heap "
- " UNION ALL "
- " SELECT reloid FROM toast_heap "
- " UNION ALL "
- " SELECT reloid FROM all_index) all_rels "
- " JOIN pg_catalog.pg_class c "
- " ON all_rels.reloid = c.oid "
- " JOIN pg_catalog.pg_namespace n "
- " ON c.relnamespace = n.oid "
- " LEFT OUTER JOIN pg_catalog.pg_tablespace t "
- " ON c.reltablespace = t.oid "
+ /* get regular heap */
+ "WITH regular_heap (reloid) AS ( "
+ " SELECT c.oid "
+ " FROM pg_catalog.pg_class c JOIN pg_catalog.pg_namespace n "
+ " ON c.relnamespace = n.oid "
+ " LEFT OUTER JOIN pg_catalog.pg_index i "
+ " ON c.oid = i.indexrelid "
+ " WHERE relkind IN ('r', 'm', 'i', 'S') AND "
+
+ /*
+ * pg_dump only dumps valid indexes; testing indisready is necessary in
+ * 9.2, and harmless in earlier/later versions.
+ */
+ " i.indisvalid IS DISTINCT FROM false AND "
+ " i.indisready IS DISTINCT FROM false AND "
+ /* exclude possible orphaned temp tables */
+ " ((n.nspname !~ '^pg_temp_' AND "
+ " n.nspname !~ '^pg_toast_temp_' AND "
+ /* skip pg_toast because toast index have relkind == 'i', not 't' */
+ " n.nspname NOT IN ('pg_catalog', 'information_schema', "
+ " 'binary_upgrade', 'pg_toast') AND "
+ " c.oid >= %u) OR "
+ " (n.nspname = 'pg_catalog' AND "
+ " relname IN ('pg_largeobject', 'pg_largeobject_loid_pn_index'%s) ))), "
+
+ /*
+ * We have to gather the TOAST tables in later steps because we can't
+ * schema-qualify TOAST tables.
+ */
+ /* get TOAST heap */
+ " toast_heap (reloid) AS ( "
+ " SELECT reltoastrelid "
+ " FROM regular_heap JOIN pg_catalog.pg_class c "
+ " ON regular_heap.reloid = c.oid "
+ " AND c.reltoastrelid != %u), "
+ /* get indexes on regular and TOAST heap */
+ " all_index (reloid) AS ( "
+ " SELECT indexrelid "
+ " FROM pg_index "
+ " WHERE indisvalid "
+ " AND indrelid IN (SELECT reltoastrelid "
+ " FROM (SELECT reloid FROM regular_heap "
+ " UNION ALL "
+ " SELECT reloid FROM toast_heap) all_heap "
+ " JOIN pg_catalog.pg_class c "
+ " ON all_heap.reloid = c.oid "
+ " AND c.reltoastrelid != %u)) "
+ /* get all rels */
+ "SELECT c.oid, n.nspname, c.relname, "
+ " c.relfilenode, c.reltablespace, %s "
+ "FROM (SELECT reloid FROM regular_heap "
+ " UNION ALL "
+ " SELECT reloid FROM toast_heap "
+ " UNION ALL "
+ " SELECT reloid FROM all_index) all_rels "
+ " JOIN pg_catalog.pg_class c "
+ " ON all_rels.reloid = c.oid "
+ " JOIN pg_catalog.pg_namespace n "
+ " ON c.relnamespace = n.oid "
+ " LEFT OUTER JOIN pg_catalog.pg_tablespace t "
+ " ON c.reltablespace = t.oid "
/* we preserve pg_class.oid so we sort by it to match old/new */
- "ORDER BY 1;",
- FirstNormalObjectId,
+ "ORDER BY 1;",
+ FirstNormalObjectId,
/* does pg_largeobject_metadata need to be migrated? */
- (GET_MAJOR_VERSION(old_cluster.major_version) <= 804) ?
- "" : ", 'pg_largeobject_metadata', 'pg_largeobject_metadata_oid_index'",
- InvalidOid, InvalidOid,
+ (GET_MAJOR_VERSION(old_cluster.major_version) <= 804) ?
+ "" : ", 'pg_largeobject_metadata', 'pg_largeobject_metadata_oid_index'",
+ InvalidOid, InvalidOid,
/* 9.2 removed the spclocation column */
- (GET_MAJOR_VERSION(cluster->major_version) <= 901) ?
- "t.spclocation" : "pg_catalog.pg_tablespace_location(t.oid) AS spclocation");
+ (GET_MAJOR_VERSION(cluster->major_version) <= 901) ?
+ "t.spclocation" : "pg_catalog.pg_tablespace_location(t.oid) AS spclocation");
res = executeQueryOrDie(conn, "%s", query);
diff --git a/src/bin/pg_upgrade/option.c b/src/bin/pg_upgrade/option.c
index b851056135..90f1401549 100644
--- a/src/bin/pg_upgrade/option.c
+++ b/src/bin/pg_upgrade/option.c
@@ -142,7 +142,7 @@ parseCommandLine(int argc, char *argv[])
old_cluster.pgopts = pg_strdup(optarg);
else
{
- char *old_pgopts = old_cluster.pgopts;
+ char *old_pgopts = old_cluster.pgopts;
old_cluster.pgopts = psprintf("%s %s", old_pgopts, optarg);
free(old_pgopts);
@@ -155,7 +155,7 @@ parseCommandLine(int argc, char *argv[])
new_cluster.pgopts = pg_strdup(optarg);
else
{
- char *new_pgopts = new_cluster.pgopts;
+ char *new_pgopts = new_cluster.pgopts;
new_cluster.pgopts = psprintf("%s %s", new_pgopts, optarg);
free(new_pgopts);
@@ -249,13 +249,15 @@ parseCommandLine(int argc, char *argv[])
"PGDATANEW", "-D", "new cluster data resides");
#ifdef WIN32
+
/*
* On Windows, initdb --sync-only will fail with a "Permission denied"
- * error on file pg_upgrade_utility.log if pg_upgrade is run inside
- * the new cluster directory, so we do a check here.
+ * error on file pg_upgrade_utility.log if pg_upgrade is run inside the
+ * new cluster directory, so we do a check here.
*/
{
- char cwd[MAXPGPATH], new_cluster_pgdata[MAXPGPATH];
+ char cwd[MAXPGPATH],
+ new_cluster_pgdata[MAXPGPATH];
strlcpy(new_cluster_pgdata, new_cluster.pgdata, MAXPGPATH);
canonicalize_path(new_cluster_pgdata);
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 4e6a9f91be..8cdfaf35ef 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -333,8 +333,8 @@ create_new_objects(void)
check_ok();
/*
- * We don't have minmxids for databases or relations in pre-9.3
- * clusters, so set those after we have restores the schemas.
+ * We don't have minmxids for databases or relations in pre-9.3 clusters,
+ * so set those after we have restores the schemas.
*/
if (GET_MAJOR_VERSION(old_cluster.major_version) < 903)
set_frozenxids(true);
@@ -473,7 +473,7 @@ copy_clog_xlog_xid(void)
/* now reset the wal archives in the new cluster */
prep_status("Resetting WAL archives");
exec_prog(UTILITY_LOG_FILE, NULL, true,
- /* use timeline 1 to match controldata and no WAL history file */
+ /* use timeline 1 to match controldata and no WAL history file */
"\"%s/pg_resetxlog\" -l 00000001%s \"%s\"", new_cluster.bindir,
old_cluster.controldata.nextxlogfile + 8,
new_cluster.pgdata);
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index aecf0df30c..13aa891d59 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -329,7 +329,7 @@ extern OSInfo os_info;
/* check.c */
void output_check_banner(bool live_check);
-void check_and_dump_old_cluster(bool live_check);
+void check_and_dump_old_cluster(bool live_check);
void check_new_cluster(void);
void report_clusters_compatible(void);
void issue_warnings(void);
@@ -358,7 +358,7 @@ void optionally_create_toast_tables(void);
#define EXEC_PSQL_ARGS "--echo-queries --set ON_ERROR_STOP=on --no-psqlrc --dbname=template1"
-bool exec_prog(const char *log_file, const char *opt_log_file,
+bool exec_prog(const char *log_file, const char *opt_log_file,
bool throw_error, const char *fmt,...) pg_attribute_printf(4, 5);
void verify_directories(void);
bool pid_lock_file_exists(const char *datadir);
@@ -471,7 +471,7 @@ void pg_putenv(const char *var, const char *val);
void new_9_0_populate_pg_largeobject_metadata(ClusterInfo *cluster,
bool check_mode);
-void old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster);
+void old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster);
/* parallel.c */
void parallel_exec_prog(const char *log_file, const char *opt_log_file,
diff --git a/src/bin/pg_upgrade/relfilenode.c b/src/bin/pg_upgrade/relfilenode.c
index 7b3215af56..c22df42949 100644
--- a/src/bin/pg_upgrade/relfilenode.c
+++ b/src/bin/pg_upgrade/relfilenode.c
@@ -35,10 +35,10 @@ transfer_all_new_tablespaces(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
user_opts.transfer_mode == TRANSFER_MODE_LINK ? "Linking" : "Copying");
/*
- * Transferring files by tablespace is tricky because a single database can
- * use multiple tablespaces. For non-parallel mode, we just pass a NULL
- * tablespace path, which matches all tablespaces. In parallel mode, we
- * pass the default tablespace and all user-created tablespaces and let
+ * Transferring files by tablespace is tricky because a single database
+ * can use multiple tablespaces. For non-parallel mode, we just pass a
+ * NULL tablespace path, which matches all tablespaces. In parallel mode,
+ * we pass the default tablespace and all user-created tablespaces and let
* those operations happen in parallel.
*/
if (user_opts.jobs <= 1)
diff --git a/src/bin/pg_upgrade/server.c b/src/bin/pg_upgrade/server.c
index 8d8e7d7073..8c6b6da515 100644
--- a/src/bin/pg_upgrade/server.c
+++ b/src/bin/pg_upgrade/server.c
@@ -204,11 +204,12 @@ start_postmaster(ClusterInfo *cluster, bool throw_error)
/*
* Since PG 9.1, we have used -b to disable autovacuum. For earlier
* releases, setting autovacuum=off disables cleanup vacuum and analyze,
- * but freeze vacuums can still happen, so we set autovacuum_freeze_max_age
- * to its maximum. (autovacuum_multixact_freeze_max_age was introduced
- * after 9.1, so there is no need to set that.) We assume all datfrozenxid
- * and relfrozenxid values are less than a gap of 2000000000 from the current
- * xid counter, so autovacuum will not touch them.
+ * but freeze vacuums can still happen, so we set
+ * autovacuum_freeze_max_age to its maximum.
+ * (autovacuum_multixact_freeze_max_age was introduced after 9.1, so there
+ * is no need to set that.) We assume all datfrozenxid and relfrozenxid
+ * values are less than a gap of 2000000000 from the current xid counter,
+ * so autovacuum will not touch them.
*
* Turn off durability requirements to improve object creation speed, and
* we only modify the new cluster, so only use it there. If there is a
diff --git a/src/bin/pg_upgrade/version.c b/src/bin/pg_upgrade/version.c
index e3e7387c92..9954daea17 100644
--- a/src/bin/pg_upgrade/version.c
+++ b/src/bin/pg_upgrade/version.c
@@ -167,9 +167,9 @@ old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster)
{
pg_log(PG_REPORT, "fatal\n");
pg_fatal("Your installation contains the \"line\" data type in user tables. This\n"
- "data type changed its internal and input/output format between your old\n"
+ "data type changed its internal and input/output format between your old\n"
"and new clusters so this cluster cannot currently be upgraded. You can\n"
- "remove the problem tables and restart the upgrade. A list of the problem\n"
+ "remove the problem tables and restart the upgrade. A list of the problem\n"
"columns is in the file:\n"
" %s\n\n", output_path);
}
diff --git a/src/bin/pg_xlogdump/pg_xlogdump.c b/src/bin/pg_xlogdump/pg_xlogdump.c
index e9cbbd264d..c0a6816784 100644
--- a/src/bin/pg_xlogdump/pg_xlogdump.c
+++ b/src/bin/pg_xlogdump/pg_xlogdump.c
@@ -494,7 +494,10 @@ XLogDumpStatsRow(const char *name,
uint64 fpi_len, uint64 total_fpi_len,
uint64 tot_len, uint64 total_len)
{
- double n_pct, rec_len_pct, fpi_len_pct, tot_len_pct;
+ double n_pct,
+ rec_len_pct,
+ fpi_len_pct,
+ tot_len_pct;
n_pct = 0;
if (total_count != 0)
@@ -528,12 +531,14 @@ XLogDumpStatsRow(const char *name,
static void
XLogDumpDisplayStats(XLogDumpConfig *config, XLogDumpStats *stats)
{
- int ri, rj;
+ int ri,
+ rj;
uint64 total_count = 0;
uint64 total_rec_len = 0;
uint64 total_fpi_len = 0;
uint64 total_len = 0;
- double rec_len_pct, fpi_len_pct;
+ double rec_len_pct,
+ fpi_len_pct;
/* ---
* Make a first pass to calculate column totals:
@@ -551,11 +556,11 @@ XLogDumpDisplayStats(XLogDumpConfig *config, XLogDumpStats *stats)
total_rec_len += stats->rmgr_stats[ri].rec_len;
total_fpi_len += stats->rmgr_stats[ri].fpi_len;
}
- total_len = total_rec_len+total_fpi_len;
+ total_len = total_rec_len + total_fpi_len;
/*
- * 27 is strlen("Transaction/COMMIT_PREPARED"),
- * 20 is strlen(2^64), 8 is strlen("(100.00%)")
+ * 27 is strlen("Transaction/COMMIT_PREPARED"), 20 is strlen(2^64), 8 is
+ * strlen("(100.00%)")
*/
printf("%-27s %20s %8s %20s %8s %20s %8s %20s %8s\n"
@@ -565,7 +570,10 @@ XLogDumpDisplayStats(XLogDumpConfig *config, XLogDumpStats *stats)
for (ri = 0; ri < RM_NEXT_ID; ri++)
{
- uint64 count, rec_len, fpi_len, tot_len;
+ uint64 count,
+ rec_len,
+ fpi_len,
+ tot_len;
const RmgrDescData *desc = &RmgrDescTable[ri];
if (!config->stats_per_record)
@@ -610,10 +618,10 @@ XLogDumpDisplayStats(XLogDumpConfig *config, XLogDumpStats *stats)
"", "--------", "", "--------", "", "--------", "", "--------");
/*
- * The percentages in earlier rows were calculated against the
- * column total, but the ones that follow are against the row total.
- * Note that these are displayed with a % symbol to differentiate
- * them from the earlier ones, and are thus up to 9 characters long.
+ * The percentages in earlier rows were calculated against the column
+ * total, but the ones that follow are against the row total. Note that
+ * these are displayed with a % symbol to differentiate them from the
+ * earlier ones, and are thus up to 9 characters long.
*/
rec_len_pct = 0;
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 8b8b5911d6..6f35db4763 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -100,7 +100,7 @@ static int pthread_join(pthread_t th, void **thread_return);
#define LOG_STEP_SECONDS 5 /* seconds between log messages */
#define DEFAULT_NXACTS 10 /* default nxacts */
-#define MIN_GAUSSIAN_THRESHOLD 2.0 /* minimum threshold for gauss */
+#define MIN_GAUSSIAN_THRESHOLD 2.0 /* minimum threshold for gauss */
int nxacts = 0; /* number of transactions per client */
int duration = 0; /* duration in seconds */
@@ -244,7 +244,8 @@ typedef struct
int64 throttle_trigger; /* previous/next throttling (us) */
int64 throttle_lag; /* total transaction lag behind throttling */
int64 throttle_lag_max; /* max transaction lag */
- int64 throttle_latency_skipped; /* lagging transactions skipped */
+ int64 throttle_latency_skipped; /* lagging transactions
+ * skipped */
int64 latency_late; /* late transactions */
} TState;
@@ -296,8 +297,8 @@ typedef struct
long start_time; /* when does the interval start */
int cnt; /* number of transactions */
- int skipped; /* number of transactions skipped under
- * --rate and --latency-limit */
+ int skipped; /* number of transactions skipped under --rate
+ * and --latency-limit */
double min_latency; /* min/max latencies */
double max_latency;
@@ -389,7 +390,7 @@ usage(void)
" -f, --file=FILENAME read transaction script from FILENAME\n"
" -j, --jobs=NUM number of threads (default: 1)\n"
" -l, --log write transaction times to log file\n"
- " -L, --latency-limit=NUM count transactions lasting more than NUM ms\n"
+ " -L, --latency-limit=NUM count transactions lasting more than NUM ms\n"
" as late.\n"
" -M, --protocol=simple|extended|prepared\n"
" protocol for submitting queries (default: simple)\n"
@@ -509,19 +510,22 @@ getrand(TState *thread, int64 min, int64 max)
static int64
getExponentialRand(TState *thread, int64 min, int64 max, double threshold)
{
- double cut, uniform, rand;
+ double cut,
+ uniform,
+ rand;
+
Assert(threshold > 0.0);
cut = exp(-threshold);
/* erand in [0, 1), uniform in (0, 1] */
uniform = 1.0 - pg_erand48(thread->random_state);
+
/*
- * inner expresion in (cut, 1] (if threshold > 0),
- * rand in [0, 1)
+ * inner expresion in (cut, 1] (if threshold > 0), rand in [0, 1)
*/
Assert((1.0 - cut) != 0.0);
- rand = - log(cut + (1.0 - cut) * uniform) / threshold;
+ rand = -log(cut + (1.0 - cut) * uniform) / threshold;
/* return int64 random number within between min and max */
- return min + (int64)((max - min + 1) * rand);
+ return min + (int64) ((max - min + 1) * rand);
}
/* random number generator: gaussian distribution from min to max inclusive */
@@ -532,34 +536,37 @@ getGaussianRand(TState *thread, int64 min, int64 max, double threshold)
double rand;
/*
- * Get user specified random number from this loop, with
- * -threshold < stdev <= threshold
+ * Get user specified random number from this loop, with -threshold <
+ * stdev <= threshold
*
* This loop is executed until the number is in the expected range.
*
* As the minimum threshold is 2.0, the probability of looping is low:
- * sqrt(-2 ln(r)) <= 2 => r >= e^{-2} ~ 0.135, then when taking the average
- * sinus multiplier as 2/pi, we have a 8.6% looping probability in the
- * worst case. For a 5.0 threshold value, the looping probability
- * is about e^{-5} * 2 / pi ~ 0.43%.
+ * sqrt(-2 ln(r)) <= 2 => r >= e^{-2} ~ 0.135, then when taking the
+ * average sinus multiplier as 2/pi, we have a 8.6% looping probability in
+ * the worst case. For a 5.0 threshold value, the looping probability is
+ * about e^{-5} * 2 / pi ~ 0.43%.
*/
do
{
/*
* pg_erand48 generates [0,1), but for the basic version of the
* Box-Muller transform the two uniformly distributed random numbers
- * are expected in (0, 1] (see http://en.wikipedia.org/wiki/Box_muller)
+ * are expected in (0, 1] (see
+ * http://en.wikipedia.org/wiki/Box_muller)
*/
- double rand1 = 1.0 - pg_erand48(thread->random_state);
- double rand2 = 1.0 - pg_erand48(thread->random_state);
+ double rand1 = 1.0 - pg_erand48(thread->random_state);
+ double rand2 = 1.0 - pg_erand48(thread->random_state);
/* Box-Muller basic form transform */
- double var_sqrt = sqrt(-2.0 * log(rand1));
+ double var_sqrt = sqrt(-2.0 * log(rand1));
+
stdev = var_sqrt * sin(2.0 * M_PI * rand2);
/*
- * we may try with cos, but there may be a bias induced if the previous
- * value fails the test. To be on the safe side, let us try over.
+ * we may try with cos, but there may be a bias induced if the
+ * previous value fails the test. To be on the safe side, let us try
+ * over.
*/
}
while (stdev < -threshold || stdev >= threshold);
@@ -568,7 +575,7 @@ getGaussianRand(TState *thread, int64 min, int64 max, double threshold)
rand = (stdev + threshold) / (threshold * 2.0);
/* return int64 random number within between min and max */
- return min + (int64)((max - min + 1) * rand);
+ return min + (int64) ((max - min + 1) * rand);
}
/*
@@ -582,7 +589,7 @@ getPoissonRand(TState *thread, int64 center)
* Use inverse transform sampling to generate a value > 0, such that the
* expected (i.e. average) value is the given argument.
*/
- double uniform;
+ double uniform;
/* erand in [0, 1), uniform in (0, 1] */
uniform = 1.0 - pg_erand48(thread->random_state);
@@ -918,7 +925,7 @@ evaluateExpr(CState *st, PgBenchExpr *expr, int64 *retval)
if ((var = getVariable(st, expr->u.variable.varname)) == NULL)
{
fprintf(stderr, "undefined variable %s\n",
- expr->u.variable.varname);
+ expr->u.variable.varname);
return false;
}
*retval = strtoint64(var);
@@ -927,8 +934,8 @@ evaluateExpr(CState *st, PgBenchExpr *expr, int64 *retval)
case ENODE_OPERATOR:
{
- int64 lval;
- int64 rval;
+ int64 lval;
+ int64 rval;
if (!evaluateExpr(st, expr->u.operator.lexpr, &lval))
return false;
@@ -1115,7 +1122,7 @@ agg_vals_init(AggVals *aggs, instr_time start)
aggs->skipped = 0; /* xacts skipped under --rate --latency-limit */
aggs->sum_latency = 0; /* SUM(latency) */
- aggs->sum2_latency = 0; /* SUM(latency*latency) */
+ aggs->sum2_latency = 0; /* SUM(latency*latency) */
/* min and max transaction duration */
aggs->min_latency = 0;
@@ -1535,9 +1542,10 @@ top:
/*
* Generate random number functions need to be able to subtract
* max from min and add one to the result without overflowing.
- * Since we know max > min, we can detect overflow just by checking
- * for a negative result. But we must check both that the subtraction
- * doesn't overflow, and that adding one to the result doesn't overflow either.
+ * Since we know max > min, we can detect overflow just by
+ * checking for a negative result. But we must check both that the
+ * subtraction doesn't overflow, and that adding one to the result
+ * doesn't overflow either.
*/
if (max - min < 0 || (max - min) + 1 < 0)
{
@@ -1546,7 +1554,7 @@ top:
return true;
}
- if (argc == 4 || /* uniform without or with "uniform" keyword */
+ if (argc == 4 || /* uniform without or with "uniform" keyword */
(argc == 5 && pg_strcasecmp(argv[4], "uniform") == 0))
{
#ifdef DEBUG
@@ -1598,7 +1606,7 @@ top:
snprintf(res, sizeof(res), INT64_FORMAT, getExponentialRand(thread, min, max, threshold));
}
}
- else /* this means an error somewhere in the parsing phase... */
+ else /* this means an error somewhere in the parsing phase... */
{
fprintf(stderr, "%s: unexpected arguments\n", argv[0]);
st->ecnt++;
@@ -1742,7 +1750,10 @@ doLog(TState *thread, CState *st, FILE *logfile, instr_time *now, AggVals *agg,
agg->cnt += 1;
if (skipped)
{
- /* there is no latency to record if the transaction was skipped */
+ /*
+ * there is no latency to record if the transaction was
+ * skipped
+ */
agg->skipped += 1;
}
else
@@ -1779,9 +1790,9 @@ doLog(TState *thread, CState *st, FILE *logfile, instr_time *now, AggVals *agg,
while (agg->start_time + agg_interval < INSTR_TIME_GET_DOUBLE(*now))
{
/*
- * This is a non-Windows branch (thanks to the
- * ifdef in usage), so we don't need to handle
- * this in a special way (see below).
+ * This is a non-Windows branch (thanks to the ifdef in
+ * usage), so we don't need to handle this in a special way
+ * (see below).
*/
fprintf(logfile, "%ld %d %.0f %.0f %.0f %.0f",
agg->start_time,
@@ -2217,7 +2228,7 @@ syntax_error(const char *source, const int lineno,
fprintf(stderr, "%s\n", line);
if (column != -1)
{
- int i;
+ int i;
for (i = 0; i < column - 1; i++)
fprintf(stderr, " ");
@@ -2260,7 +2271,8 @@ process_commands(char *buf, const char *source, const int lineno)
if (*p == '\\')
{
- int max_args = -1;
+ int max_args = -1;
+
my_commands->type = META_COMMAND;
j = 0;
@@ -2282,9 +2294,9 @@ process_commands(char *buf, const char *source, const int lineno)
if (pg_strcasecmp(my_commands->argv[0], "setrandom") == 0)
{
- /* parsing:
- * \setrandom variable min max [uniform]
- * \setrandom variable min max (gaussian|exponential) threshold
+ /*
+ * parsing: \setrandom variable min max [uniform] \setrandom
+ * variable min max (gaussian|exponential) threshold
*/
if (my_commands->argc < 4)
@@ -2295,20 +2307,21 @@ process_commands(char *buf, const char *source, const int lineno)
/* argc >= 4 */
- if (my_commands->argc == 4 || /* uniform without/with "uniform" keyword */
+ if (my_commands->argc == 4 || /* uniform without/with
+ * "uniform" keyword */
(my_commands->argc == 5 &&
pg_strcasecmp(my_commands->argv[4], "uniform") == 0))
{
/* nothing to do */
}
- else if (/* argc >= 5 */
+ else if ( /* argc >= 5 */
(pg_strcasecmp(my_commands->argv[4], "gaussian") == 0) ||
- (pg_strcasecmp(my_commands->argv[4], "exponential") == 0))
+ (pg_strcasecmp(my_commands->argv[4], "exponential") == 0))
{
if (my_commands->argc < 6)
{
syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
- "missing threshold argument", my_commands->argv[4], -1);
+ "missing threshold argument", my_commands->argv[4], -1);
}
else if (my_commands->argc > 6)
{
@@ -2317,7 +2330,7 @@ process_commands(char *buf, const char *source, const int lineno)
my_commands->cols[6]);
}
}
- else /* cannot parse, unexpected arguments */
+ else /* cannot parse, unexpected arguments */
{
syntax_error(source, lineno, my_commands->line, my_commands->argv[0],
"unexpected argument", my_commands->argv[4],
@@ -2486,7 +2499,8 @@ process_file(char *filename)
Command **my_commands;
FILE *fd;
- int lineno, index;
+ int lineno,
+ index;
char *buf;
int alloc_num;
@@ -2514,6 +2528,7 @@ process_file(char *filename)
while ((buf = read_line_from_file(fd)) != NULL)
{
Command *command;
+
lineno += 1;
command = process_commands(buf, filename, lineno);
@@ -2547,7 +2562,8 @@ process_builtin(char *tb, const char *source)
#define COMMANDS_ALLOC_NUM 128
Command **my_commands;
- int lineno, index;
+ int lineno,
+ index;
char buf[BUFSIZ];
int alloc_num;
@@ -2653,7 +2669,7 @@ printResults(int ttype, int64 normal_xacts, int nclients,
if (latency_limit)
printf("number of transactions above the %.1f ms latency limit: " INT64_FORMAT " (%.3f %%)\n",
latency_limit / 1000.0, latency_late,
- 100.0 * latency_late / (throttle_latency_skipped + normal_xacts));
+ 100.0 * latency_late / (throttle_latency_skipped + normal_xacts));
if (throttle_delay || progress || latency_limit)
{
@@ -3045,7 +3061,8 @@ main(int argc, char **argv)
break;
case 'L':
{
- double limit_ms = atof(optarg);
+ double limit_ms = atof(optarg);
+
if (limit_ms <= 0.0)
{
fprintf(stderr, "invalid latency limit: %s\n", optarg);
diff --git a/src/bin/pgbench/pgbench.h b/src/bin/pgbench/pgbench.h
index a3db6b97cc..42e2aae294 100644
--- a/src/bin/pgbench/pgbench.h
+++ b/src/bin/pgbench/pgbench.h
@@ -22,39 +22,39 @@ typedef struct PgBenchExpr PgBenchExpr;
struct PgBenchExpr
{
- PgBenchExprType etype;
+ PgBenchExprType etype;
union
{
struct
{
- int64 ival;
- } integer_constant;
+ int64 ival;
+ } integer_constant;
struct
{
- char *varname;
- } variable;
+ char *varname;
+ } variable;
struct
{
- char operator;
- PgBenchExpr *lexpr;
+ char operator;
+ PgBenchExpr *lexpr;
PgBenchExpr *rexpr;
- } operator;
- } u;
+ } operator;
+ } u;
};
extern PgBenchExpr *expr_parse_result;
-extern int expr_yyparse(void);
-extern int expr_yylex(void);
+extern int expr_yyparse(void);
+extern int expr_yylex(void);
extern void expr_yyerror(const char *str);
extern void expr_scanner_init(const char *str, const char *source,
- const int lineno, const char *line,
- const char *cmd, const int ecol);
-extern void syntax_error(const char* source, const int lineno, const char* line,
- const char* cmd, const char* msg, const char* more,
- const int col);
+ const int lineno, const char *line,
+ const char *cmd, const int ecol);
+extern void syntax_error(const char *source, const int lineno, const char *line,
+ const char *cmd, const char *msg, const char *more,
+ const int col);
extern void expr_scanner_finish(void);
extern int64 strtoint64(const char *str);
-#endif /* PGBENCH_H */
+#endif /* PGBENCH_H */
diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c
index 70b7d3be15..38253fa098 100644
--- a/src/bin/psql/command.c
+++ b/src/bin/psql/command.c
@@ -1082,7 +1082,8 @@ exec_command(const char *cmd,
for (i = 0; my_list[i] != NULL; i++)
{
- char *val = pset_value_string(my_list[i], &pset.popt);
+ char *val = pset_value_string(my_list[i], &pset.popt);
+
printf("%-24s %s\n", my_list[i], val);
free(val);
}
@@ -1515,7 +1516,7 @@ exec_command(const char *cmd,
else if (strcmp(cmd, "?") == 0)
{
char *opt0 = psql_scan_slash_option(scan_state,
- OT_NORMAL, NULL, false);
+ OT_NORMAL, NULL, false);
if (!opt0 || strcmp(opt0, "commands") == 0)
slashUsage(pset.popt.topt.pager);
@@ -1636,8 +1637,8 @@ do_connect(char *dbname, char *user, char *host, char *port)
/*
* Any change in the parameters read above makes us discard the password.
- * We also discard it if we're to use a conninfo rather than the positional
- * syntax.
+ * We also discard it if we're to use a conninfo rather than the
+ * positional syntax.
*/
keep_password =
((strcmp(user, PQuser(o_conn)) == 0) &&
@@ -1863,7 +1864,7 @@ printSSLInfo(void)
protocol ? protocol : _("unknown"),
cipher ? cipher : _("unknown"),
bits ? bits : _("unknown"),
- (compression && strcmp(compression, "off") != 0) ? _("on") : _("off"));
+ (compression && strcmp(compression, "off") != 0) ? _("on") : _("off"));
}
@@ -2402,7 +2403,7 @@ do_pset(const char *param, const char *value, printQueryOpt *popt, bool quiet)
if (!value)
;
else if (!set_unicode_line_style(popt, value, vallen,
- &popt->topt.unicode_border_linestyle))
+ &popt->topt.unicode_border_linestyle))
{
psql_error("\\pset: allowed unicode border linestyle are single, double\n");
return false;
@@ -2415,7 +2416,7 @@ do_pset(const char *param, const char *value, printQueryOpt *popt, bool quiet)
if (!value)
;
else if (!set_unicode_line_style(popt, value, vallen,
- &popt->topt.unicode_column_linestyle))
+ &popt->topt.unicode_column_linestyle))
{
psql_error("\\pset: allowed unicode column linestyle are single, double\n");
return false;
@@ -2428,7 +2429,7 @@ do_pset(const char *param, const char *value, printQueryOpt *popt, bool quiet)
if (!value)
;
else if (!set_unicode_line_style(popt, value, vallen,
- &popt->topt.unicode_header_linestyle))
+ &popt->topt.unicode_header_linestyle))
{
psql_error("\\pset: allowed unicode header linestyle are single, double\n");
return false;
@@ -2742,19 +2743,19 @@ printPsetInfo(const char *param, struct printQueryOpt *popt)
else if (strcmp(param, "unicode_border_linestyle") == 0)
{
printf(_("Unicode border linestyle is \"%s\".\n"),
- _unicode_linestyle2string(popt->topt.unicode_border_linestyle));
+ _unicode_linestyle2string(popt->topt.unicode_border_linestyle));
}
else if (strcmp(param, "unicode_column_linestyle") == 0)
{
printf(_("Unicode column linestyle is \"%s\".\n"),
- _unicode_linestyle2string(popt->topt.unicode_column_linestyle));
+ _unicode_linestyle2string(popt->topt.unicode_column_linestyle));
}
else if (strcmp(param, "unicode_header_linestyle") == 0)
{
printf(_("Unicode border linestyle is \"%s\".\n"),
- _unicode_linestyle2string(popt->topt.unicode_header_linestyle));
+ _unicode_linestyle2string(popt->topt.unicode_header_linestyle));
}
else
@@ -2945,7 +2946,7 @@ do_watch(PQExpBuffer query_buf, long sleep)
for (;;)
{
- int res;
+ int res;
time_t timer;
long i;
@@ -2962,8 +2963,8 @@ do_watch(PQExpBuffer query_buf, long sleep)
res = PSQLexecWatch(query_buf->data, &myopt);
/*
- * PSQLexecWatch handles the case where we can no longer
- * repeat the query, and returns 0 or -1.
+ * PSQLexecWatch handles the case where we can no longer repeat the
+ * query, and returns 0 or -1.
*/
if (res == 0)
break;
@@ -3001,7 +3002,7 @@ do_watch(PQExpBuffer query_buf, long sleep)
* returns true unless we have ECHO_HIDDEN_NOEXEC.
*/
static bool
-lookup_function_echo_hidden(char * query)
+lookup_function_echo_hidden(char *query)
{
if (pset.echo_hidden != PSQL_ECHO_HIDDEN_OFF)
{
diff --git a/src/bin/psql/common.c b/src/bin/psql/common.c
index ff01368531..0e266a3e18 100644
--- a/src/bin/psql/common.c
+++ b/src/bin/psql/common.c
@@ -491,7 +491,7 @@ int
PSQLexecWatch(const char *query, const printQueryOpt *opt)
{
PGresult *res;
- double elapsed_msec = 0;
+ double elapsed_msec = 0;
instr_time before;
instr_time after;
@@ -524,10 +524,9 @@ PSQLexecWatch(const char *query, const printQueryOpt *opt)
}
/*
- * If SIGINT is sent while the query is processing, the interrupt
- * will be consumed. The user's intention, though, is to cancel
- * the entire watch process, so detect a sent cancellation request and
- * exit in this case.
+ * If SIGINT is sent while the query is processing, the interrupt will be
+ * consumed. The user's intention, though, is to cancel the entire watch
+ * process, so detect a sent cancellation request and exit in this case.
*/
if (cancel_pressed)
{
diff --git a/src/bin/psql/common.h b/src/bin/psql/common.h
index 3c3ffa3f14..caf31d19b8 100644
--- a/src/bin/psql/common.h
+++ b/src/bin/psql/common.h
@@ -36,7 +36,7 @@ extern void SetCancelConn(void);
extern void ResetCancelConn(void);
extern PGresult *PSQLexec(const char *query);
-extern int PSQLexecWatch(const char *query, const printQueryOpt *opt);
+extern int PSQLexecWatch(const char *query, const printQueryOpt *opt);
extern bool SendQuery(const char *query);
diff --git a/src/bin/psql/copy.c b/src/bin/psql/copy.c
index 965a1dcb26..f1eb518de7 100644
--- a/src/bin/psql/copy.c
+++ b/src/bin/psql/copy.c
@@ -556,6 +556,7 @@ handleCopyIn(PGconn *conn, FILE *copystream, bool isbinary, PGresult **res)
if (showprompt)
{
const char *prompt = get_prompt(PROMPT_COPY);
+
fputs(prompt, stdout);
fflush(stdout);
}
@@ -593,6 +594,7 @@ handleCopyIn(PGconn *conn, FILE *copystream, bool isbinary, PGresult **res)
if (showprompt)
{
const char *prompt = get_prompt(PROMPT_COPY);
+
fputs(prompt, stdout);
fflush(stdout);
}
diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c
index 04d769e3d6..db568096dc 100644
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@@ -531,7 +531,7 @@ describeTypes(const char *pattern, bool verbose, bool showSystem)
if (verbose)
{
appendPQExpBuffer(&buf,
- " pg_catalog.pg_get_userbyid(t.typowner) AS \"%s\",\n",
+ " pg_catalog.pg_get_userbyid(t.typowner) AS \"%s\",\n",
gettext_noop("Owner"));
}
if (verbose && pset.sversion >= 90200)
@@ -803,7 +803,7 @@ permissionsList(const char *pattern)
" ELSE E''\n"
" END"
" || CASE WHEN polroles <> '{0}' THEN\n"
- " E'\\n to: ' || pg_catalog.array_to_string(\n"
+ " E'\\n to: ' || pg_catalog.array_to_string(\n"
" ARRAY(\n"
" SELECT rolname\n"
" FROM pg_catalog.pg_roles\n"
@@ -2031,19 +2031,19 @@ describeOneTableDetails(const char *schemaname,
if (pset.sversion >= 90500)
{
printfPQExpBuffer(&buf,
- "SELECT pol.polname,\n"
- "CASE WHEN pol.polroles = '{0}' THEN NULL ELSE array_to_string(array(select rolname from pg_roles where oid = any (pol.polroles) order by 1),',') END,\n"
- "pg_catalog.pg_get_expr(pol.polqual, pol.polrelid),\n"
- "pg_catalog.pg_get_expr(pol.polwithcheck, pol.polrelid),\n"
- "CASE pol.polcmd \n"
- "WHEN 'r' THEN 'SELECT'\n"
- "WHEN 'a' THEN 'INSERT'\n"
- "WHEN 'w' THEN 'UPDATE'\n"
- "WHEN 'd' THEN 'DELETE'\n"
- "WHEN '*' THEN 'ALL'\n"
- "END AS cmd\n"
+ "SELECT pol.polname,\n"
+ "CASE WHEN pol.polroles = '{0}' THEN NULL ELSE array_to_string(array(select rolname from pg_roles where oid = any (pol.polroles) order by 1),',') END,\n"
+ "pg_catalog.pg_get_expr(pol.polqual, pol.polrelid),\n"
+ "pg_catalog.pg_get_expr(pol.polwithcheck, pol.polrelid),\n"
+ "CASE pol.polcmd \n"
+ "WHEN 'r' THEN 'SELECT'\n"
+ "WHEN 'a' THEN 'INSERT'\n"
+ "WHEN 'w' THEN 'UPDATE'\n"
+ "WHEN 'd' THEN 'DELETE'\n"
+ "WHEN '*' THEN 'ALL'\n"
+ "END AS cmd\n"
"FROM pg_catalog.pg_policy pol\n"
- "WHERE pol.polrelid = '%s' ORDER BY 1;",
+ "WHERE pol.polrelid = '%s' ORDER BY 1;",
oid);
result = PSQLexec(buf.data);
@@ -2053,9 +2053,9 @@ describeOneTableDetails(const char *schemaname,
tuples = PQntuples(result);
/*
- * Handle cases where RLS is enabled and there are policies,
- * or there aren't policies, or RLS isn't enabled but there
- * are policies
+ * Handle cases where RLS is enabled and there are policies, or
+ * there aren't policies, or RLS isn't enabled but there are
+ * policies
*/
if (tableinfo.rowsecurity && tuples > 0)
printTableAddFooter(&cont, _("Policies:"));
@@ -2070,7 +2070,7 @@ describeOneTableDetails(const char *schemaname,
for (i = 0; i < tuples; i++)
{
printfPQExpBuffer(&buf, " POLICY \"%s\"",
- PQgetvalue(result, i, 0));
+ PQgetvalue(result, i, 0));
if (!PQgetisnull(result, i, 4))
appendPQExpBuffer(&buf, " FOR %s",
diff --git a/src/bin/psql/help.c b/src/bin/psql/help.c
index ea05c3e37b..b523054825 100644
--- a/src/bin/psql/help.c
+++ b/src/bin/psql/help.c
@@ -81,11 +81,11 @@ usage(unsigned short int pager)
fprintf(output, _(" -f, --file=FILENAME execute commands from file, then exit\n"));
fprintf(output, _(" -l, --list list available databases, then exit\n"));
fprintf(output, _(" -v, --set=, --variable=NAME=VALUE\n"
- " set psql variable NAME to VALUE e.g.: -v ON_ERROR_STOP=1\n"));
+ " set psql variable NAME to VALUE e.g.: -v ON_ERROR_STOP=1\n"));
fprintf(output, _(" -V, --version output version information, then exit\n"));
fprintf(output, _(" -X, --no-psqlrc do not read startup file (~/.psqlrc)\n"));
fprintf(output, _(" -1 (\"one\"), --single-transaction\n"
- " execute as a single transaction (if non-interactive)\n"));
+ " execute as a single transaction (if non-interactive)\n"));
fprintf(output, _(" -?, --help[=options] show this help, then exit\n"));
fprintf(output, _(" --help=variables show a list of all specially treated variables, then exit\n"));
fprintf(output, _(" --help=commands show a list of backslash commands, then exit\n"));
@@ -105,29 +105,29 @@ usage(unsigned short int pager)
fprintf(output, _("\nOutput format options:\n"));
fprintf(output, _(" -A, --no-align unaligned table output mode\n"));
fprintf(output, _(" -F, --field-separator=STRING\n"
- " field separator for unaligned output (default: \"%s\")\n"),
- DEFAULT_FIELD_SEP);
+ " field separator for unaligned output (default: \"%s\")\n"),
+ DEFAULT_FIELD_SEP);
fprintf(output, _(" -H, --html HTML table output mode\n"));
fprintf(output, _(" -P, --pset=VAR[=ARG] set printing option VAR to ARG (see \\pset command)\n"));
fprintf(output, _(" -R, --record-separator=STRING\n"
- " record separator for unaligned output (default: newline)\n"));
+ " record separator for unaligned output (default: newline)\n"));
fprintf(output, _(" -t, --tuples-only print rows only\n"));
fprintf(output, _(" -T, --table-attr=TEXT set HTML table tag attributes (e.g., width, border)\n"));
fprintf(output, _(" -x, --expanded turn on expanded table output\n"));
fprintf(output, _(" -z, --field-separator-zero\n"
- " set field separator for unaligned output to zero byte\n"));
+ " set field separator for unaligned output to zero byte\n"));
fprintf(output, _(" -0, --record-separator-zero\n"
- " set record separator for unaligned output to zero byte\n"));
+ " set record separator for unaligned output to zero byte\n"));
fprintf(output, _("\nConnection options:\n"));
/* Display default host */
env = getenv("PGHOST");
fprintf(output, _(" -h, --host=HOSTNAME database server host or socket directory (default: \"%s\")\n"),
- env ? env : _("local socket"));
+ env ? env : _("local socket"));
/* Display default port */
env = getenv("PGPORT");
fprintf(output, _(" -p, --port=PORT database server port (default: \"%s\")\n"),
- env ? env : DEF_PGPORT_STR);
+ env ? env : DEF_PGPORT_STR);
/* Display default user */
env = getenv("PGUSER");
if (!env)
@@ -137,8 +137,8 @@ usage(unsigned short int pager)
fprintf(output, _(" -W, --password force password prompt (should happen automatically)\n"));
fprintf(output, _("\nFor more information, type \"\\?\" (for internal commands) or \"\\help\" (for SQL\n"
- "commands) from within psql, or consult the psql section in the PostgreSQL\n"
- "documentation.\n\n"));
+ "commands) from within psql, or consult the psql section in the PostgreSQL\n"
+ "documentation.\n\n"));
fprintf(output, _("Report bugs to .\n"));
ClosePager(output);
@@ -315,15 +315,15 @@ helpVariables(unsigned short int pager)
fprintf(output, _(" AUTOCOMMIT if set, successful SQL commands are automatically committed\n"));
fprintf(output, _(" COMP_KEYWORD_CASE determine the case used to complete SQL keywords\n"
- " [lower, upper, preserve-lower, preserve-upper]\n"));
+ " [lower, upper, preserve-lower, preserve-upper]\n"));
fprintf(output, _(" DBNAME the currently connected database name\n"));
fprintf(output, _(" ECHO control what input is written to standard output\n"
- " [all, errors, none, queries]\n"));
+ " [all, errors, none, queries]\n"));
fprintf(output, _(" ECHO_HIDDEN display internal queries executed by backslash commands when it is set\n"
- " or with [noexec] just show without execution\n"));
+ " or with [noexec] just show without execution\n"));
fprintf(output, _(" ENCODING current client character set encoding\n"));
fprintf(output, _(" FETCH_COUNT the number of result rows to fetch and display at a time\n"
- " (default: 0=unlimited)\n"));
+ " (default: 0=unlimited)\n"));
fprintf(output, _(" HISTCONTROL control history list [ignorespace, ignoredups, ignoreboth]\n"));
fprintf(output, _(" HISTFILE file name used to store the history list\n"));
fprintf(output, _(" HISTSIZE the number of commands to store in the command history\n"));
@@ -356,18 +356,18 @@ helpVariables(unsigned short int pager)
fprintf(output, _(" linestyle set the border line drawing style [ascii, old-ascii, unicode]\n"));
fprintf(output, _(" null set the string to be printed in place of a null value\n"));
fprintf(output, _(" numericlocale enable or disable display of a locale-specific character to separate\n"
- " groups of digits [on, off]\n"));
+ " groups of digits [on, off]\n"));
fprintf(output, _(" pager control when an external pager is used [yes, no, always]\n"));
fprintf(output, _(" recordsep specify the record (line) separator to use in unaligned output format\n"));
fprintf(output, _(" recordsep_zero set the record separator to use in unaligned output format to a zero byte.\n"));
fprintf(output, _(" tableattr (or T) specify attributes for table tag in html format or proportional\n"
- " column width of left aligned data type in latex format\n"));
+ " column width of left aligned data type in latex format\n"));
fprintf(output, _(" title set the table title for any subsequently printed tables\n"));
fprintf(output, _(" tuples_only if set, only actual table data is shown\n"));
fprintf(output, _(" unicode_border_linestyle\n"));
fprintf(output, _(" unicode_column_linestyle\n"));
fprintf(output, _(" unicode_header_linestyle\n"
- " set the style of unicode line drawing [single, double]\n"));
+ " set the style of unicode line drawing [single, double]\n"));
fprintf(output, _("\nEnvironment variables:\n"));
fprintf(output, _("Usage:\n"));
@@ -388,9 +388,9 @@ helpVariables(unsigned short int pager)
fprintf(output, _(" PGPASSWORD connection password (not recommended)\n"));
fprintf(output, _(" PGPASSFILE password file name\n"));
fprintf(output, _(" PSQL_EDITOR, EDITOR, VISUAL\n"
- " editor used by the \\e and \\ef commands\n"));
+ " editor used by the \\e and \\ef commands\n"));
fprintf(output, _(" PSQL_EDITOR_LINENUMBER_ARG\n"
- " how to specify a line number when invoking the editor\n"));
+ " how to specify a line number when invoking the editor\n"));
fprintf(output, _(" PSQL_HISTORY alternative location for the command history file\n"));
fprintf(output, _(" PSQLRC alternative location for the user's .psqlrc file\n"));
fprintf(output, _(" SHELL shell used by the \\! command\n"));
diff --git a/src/bin/psql/print.c b/src/bin/psql/print.c
index 94c69845c7..cab9e6eb44 100644
--- a/src/bin/psql/print.c
+++ b/src/bin/psql/print.c
@@ -92,20 +92,23 @@ const printTextFormat pg_asciiformat_old =
/* Default unicode linestyle format */
const printTextFormat pg_utf8format;
-typedef struct unicodeStyleRowFormat {
+typedef struct unicodeStyleRowFormat
+{
const char *horizontal;
const char *vertical_and_right[2];
const char *vertical_and_left[2];
} unicodeStyleRowFormat;
-typedef struct unicodeStyleColumnFormat {
+typedef struct unicodeStyleColumnFormat
+{
const char *vertical;
const char *vertical_and_horizontal[2];
const char *up_and_horizontal[2];
const char *down_and_horizontal[2];
} unicodeStyleColumnFormat;
-typedef struct unicodeStyleBorderFormat {
+typedef struct unicodeStyleBorderFormat
+{
const char *up_and_right;
const char *vertical;
const char *down_and_right;
@@ -114,7 +117,8 @@ typedef struct unicodeStyleBorderFormat {
const char *left_and_right;
} unicodeStyleBorderFormat;
-typedef struct unicodeStyleFormat {
+typedef struct unicodeStyleFormat
+{
unicodeStyleRowFormat row_style[2];
unicodeStyleColumnFormat column_style[2];
unicodeStyleBorderFormat border_style[2];
@@ -124,7 +128,7 @@ typedef struct unicodeStyleFormat {
const char *nl_right;
const char *wrap_left;
const char *wrap_right;
- bool wrap_right_border;
+ bool wrap_right_border;
} unicodeStyleFormat;
const unicodeStyleFormat unicode_style = {
@@ -175,11 +179,11 @@ const unicodeStyleFormat unicode_style = {
{"\342\225\232", "\342\225\221", "\342\225\224", "\342\225\220", "\342\225\227", "\342\225\235"},
},
" ",
- "\342\206\265", /* ↵ */
+ "\342\206\265", /* ↵ */
" ",
- "\342\206\265", /* ↵ */
- "\342\200\246", /* … */
- "\342\200\246", /* … */
+ "\342\206\265", /* ↵ */
+ "\342\200\246", /* … */
+ "\342\200\246", /* … */
true
};
@@ -984,7 +988,7 @@ print_aligned_text(const printTableContent *cont, FILE *fout)
int bytes_to_output;
int chars_to_output = width_wrap[j];
bool finalspaces = (opt_border == 2 ||
- (col_count > 0 && j < col_count - 1));
+ (col_count > 0 && j < col_count - 1));
/* Print left-hand wrap or newline mark */
if (opt_border != 0)
@@ -1356,12 +1360,13 @@ print_aligned_vertical(const printTableContent *cont, FILE *fout)
else if (opt_border == 1)
{
/*
- * For border = 1, one for the pipe (|) in the middle
- * between the two spaces.
+ * For border = 1, one for the pipe (|) in the middle between the
+ * two spaces.
*/
swidth = 3;
}
else
+
/*
* For border = 2, two more for the pipes (|) at the beginning and
* at the end of the lines.
@@ -1370,10 +1375,10 @@ print_aligned_vertical(const printTableContent *cont, FILE *fout)
if ((opt_border < 2) &&
((hmultiline &&
- (format == &pg_asciiformat_old)) ||
- (dmultiline &&
- (format != &pg_asciiformat_old))))
- iwidth++; /* for newline indicators */
+ (format == &pg_asciiformat_old)) ||
+ (dmultiline &&
+ (format != &pg_asciiformat_old))))
+ iwidth++; /* for newline indicators */
min_width = hwidth + iwidth + swidth + 3;
@@ -1386,6 +1391,7 @@ print_aligned_vertical(const printTableContent *cont, FILE *fout)
* Record number
*/
unsigned int rwidth = 1 + log10(cont->nrows);
+
if (opt_border == 0)
rwidth += 9; /* "* RECORD " */
else if (opt_border == 1)
@@ -1402,6 +1408,7 @@ print_aligned_vertical(const printTableContent *cont, FILE *fout)
if ((width < min_width) || (output_columns < min_width))
width = min_width - hwidth - iwidth - swidth;
else if (output_columns > 0)
+
/*
* Wrap to maximum width
*/
@@ -1412,7 +1419,7 @@ print_aligned_vertical(const printTableContent *cont, FILE *fout)
dmultiline = true;
if ((opt_border == 0) &&
(format != &pg_asciiformat_old))
- width--; /* for wrap indicators */
+ width--; /* for wrap indicators */
}
dwidth = width;
}
@@ -1440,10 +1447,11 @@ print_aligned_vertical(const printTableContent *cont, FILE *fout)
if (i % cont->ncolumns == 0)
{
unsigned int lhwidth = hwidth;
+
if ((opt_border < 2) &&
(hmultiline) &&
(format == &pg_asciiformat_old))
- lhwidth++; /* for newline indicators */
+ lhwidth++; /* for newline indicators */
if (!opt_tuples_only)
print_aligned_vertical_line(format, opt_border, record++,
@@ -1480,12 +1488,14 @@ print_aligned_vertical(const printTableContent *cont, FILE *fout)
{
int swidth = hwidth,
target_width = hwidth;
+
/*
* Left spacer or new line indicator
*/
if ((opt_border == 2) ||
(hmultiline && (format == &pg_asciiformat_old)))
fputs(hline ? format->header_nl_left : " ", fout);
+
/*
* Header text
*/
@@ -1523,6 +1533,7 @@ print_aligned_vertical(const printTableContent *cont, FILE *fout)
else
{
unsigned int swidth = hwidth + opt_border;
+
if ((opt_border < 2) &&
(hmultiline) &&
(format == &pg_asciiformat_old))
@@ -1886,9 +1897,10 @@ static void
asciidoc_escaped_print(const char *in, FILE *fout)
{
const char *p;
+
for (p = in; *p; p++)
{
- switch(*p)
+ switch (*p)
{
case '|':
fputs("\\|", fout);
@@ -1925,7 +1937,7 @@ print_asciidoc_text(const printTableContent *cont, FILE *fout)
/* print table [] header definition */
fprintf(fout, "[%scols=\"", !opt_tuples_only ? "options=\"header\"," : "");
- for(i = 0; i < cont->ncolumns; i++)
+ for (i = 0; i < cont->ncolumns; i++)
{
if (i != 0)
fputs(",", fout);
@@ -2046,7 +2058,7 @@ print_asciidoc_vertical(const printTableContent *cont, FILE *fout)
break;
case 2:
fputs(",frame=\"all\",grid=\"all\"", fout);
- break;
+ break;
}
fputs("]\n", fout);
fputs("|====\n", fout);
@@ -2729,8 +2741,8 @@ PageOutput(int lines, const printTableOpt *topt)
{
const char *pagerprog;
FILE *pagerpipe;
- unsigned short int pager = topt->pager;
- int min_lines = topt->pager_min_lines;
+ unsigned short int pager = topt->pager;
+ int min_lines = topt->pager_min_lines;
#ifdef TIOCGWINSZ
int result;
@@ -3262,7 +3274,7 @@ get_line_style(const printTableOpt *opt)
void
refresh_utf8format(const printTableOpt *opt)
{
- printTextFormat *popt = (printTextFormat *) &pg_utf8format;
+ printTextFormat *popt = (printTextFormat *) &pg_utf8format;
const unicodeStyleBorderFormat *border;
const unicodeStyleRowFormat *header;
diff --git a/src/bin/psql/print.h b/src/bin/psql/print.h
index 322db4d6ef..b0b6bf5251 100644
--- a/src/bin/psql/print.h
+++ b/src/bin/psql/print.h
@@ -90,7 +90,7 @@ typedef struct printTableOpt
* 1=dividing lines, 2=full */
unsigned short int pager; /* use pager for output (if to stdout and
* stdout is a tty) 0=off 1=on 2=always */
- int pager_min_lines;/* don't use pager unless there are at least
+ int pager_min_lines;/* don't use pager unless there are at least
* this many lines */
bool tuples_only; /* don't output headers, row counts, etc. */
bool start_table; /* print start decoration, eg