Skip to content

Commit b663a41

Browse files
committed
Implement support for bulk inserts in postgres_fdw
Extends the FDW API to allow batching inserts into foreign tables. That is usually much more efficient than inserting individual rows, due to high latency for each round-trip to the foreign server. It was possible to implement something similar in the regular FDW API, but it was inconvenient and there were issues with reporting the number of actually inserted rows etc. This extends the FDW API with two new functions: * GetForeignModifyBatchSize - allows the FDW picking optimal batch size * ExecForeignBatchInsert - inserts a batch of rows at once Currently, only INSERT queries support batching. Support for DELETE and UPDATE may be added in the future. This also implements batching for postgres_fdw. The batch size may be specified using "batch_size" option both at the server and table level. The initial patch version was written by me, but it was rewritten and improved in many ways by Takayuki Tsunakawa. Author: Takayuki Tsunakawa Reviewed-by: Tomas Vondra, Amit Langote Discussion: https://postgr.es/m/20200628151002.7x5laxwpgvkyiu3q@development
1 parent ad600bb commit b663a41

File tree

14 files changed

+903
-72
lines changed

14 files changed

+903
-72
lines changed

contrib/postgres_fdw/deparse.c

+53-1
Original file line numberDiff line numberDiff line change
@@ -1705,13 +1705,16 @@ deparseRangeTblRef(StringInfo buf, PlannerInfo *root, RelOptInfo *foreignrel,
17051705
* The statement text is appended to buf, and we also create an integer List
17061706
* of the columns being retrieved by WITH CHECK OPTION or RETURNING (if any),
17071707
* which is returned to *retrieved_attrs.
1708+
*
1709+
* This also stores end position of the VALUES clause, so that we can rebuild
1710+
* an INSERT for a batch of rows later.
17081711
*/
17091712
void
17101713
deparseInsertSql(StringInfo buf, RangeTblEntry *rte,
17111714
Index rtindex, Relation rel,
17121715
List *targetAttrs, bool doNothing,
17131716
List *withCheckOptionList, List *returningList,
1714-
List **retrieved_attrs)
1717+
List **retrieved_attrs, int *values_end_len)
17151718
{
17161719
AttrNumber pindex;
17171720
bool first;
@@ -1754,6 +1757,7 @@ deparseInsertSql(StringInfo buf, RangeTblEntry *rte,
17541757
}
17551758
else
17561759
appendStringInfoString(buf, " DEFAULT VALUES");
1760+
*values_end_len = buf->len;
17571761

17581762
if (doNothing)
17591763
appendStringInfoString(buf, " ON CONFLICT DO NOTHING");
@@ -1763,6 +1767,54 @@ deparseInsertSql(StringInfo buf, RangeTblEntry *rte,
17631767
withCheckOptionList, returningList, retrieved_attrs);
17641768
}
17651769

1770+
/*
1771+
* rebuild remote INSERT statement
1772+
*
1773+
* Provided a number of rows in a batch, builds INSERT statement with the
1774+
* right number of parameters.
1775+
*/
1776+
void
1777+
rebuildInsertSql(StringInfo buf, char *orig_query,
1778+
int values_end_len, int num_cols,
1779+
int num_rows)
1780+
{
1781+
int i, j;
1782+
int pindex;
1783+
bool first;
1784+
1785+
/* Make sure the values_end_len is sensible */
1786+
Assert((values_end_len > 0) && (values_end_len <= strlen(orig_query)));
1787+
1788+
/* Copy up to the end of the first record from the original query */
1789+
appendBinaryStringInfo(buf, orig_query, values_end_len);
1790+
1791+
/*
1792+
* Add records to VALUES clause (we already have parameters for the
1793+
* first row, so start at the right offset).
1794+
*/
1795+
pindex = num_cols + 1;
1796+
for (i = 0; i < num_rows; i++)
1797+
{
1798+
appendStringInfoString(buf, ", (");
1799+
1800+
first = true;
1801+
for (j = 0; j < num_cols; j++)
1802+
{
1803+
if (!first)
1804+
appendStringInfoString(buf, ", ");
1805+
first = false;
1806+
1807+
appendStringInfo(buf, "$%d", pindex);
1808+
pindex++;
1809+
}
1810+
1811+
appendStringInfoChar(buf, ')');
1812+
}
1813+
1814+
/* Copy stuff after VALUES clause from the original query */
1815+
appendStringInfoString(buf, orig_query + values_end_len);
1816+
}
1817+
17661818
/*
17671819
* deparse remote UPDATE statement
17681820
*

contrib/postgres_fdw/expected/postgres_fdw.out

+148-7
Original file line numberDiff line numberDiff line change
@@ -3887,9 +3887,10 @@ EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st7;
38873887
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
38883888
Insert on public.ft1
38893889
Remote SQL: INSERT INTO "S 1"."T 1"("C 1", c2, c3, c4, c5, c6, c7, c8) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
3890+
Batch Size: 1
38903891
-> Result
38913892
Output: NULL::integer, 1001, 101, 'foo'::text, NULL::timestamp with time zone, NULL::timestamp without time zone, NULL::character varying, 'ft1 '::character(10), NULL::user_enum
3892-
(4 rows)
3893+
(5 rows)
38933894

38943895
ALTER TABLE "S 1"."T 1" RENAME TO "T 0";
38953896
ALTER FOREIGN TABLE ft1 OPTIONS (SET table_name 'T 0');
@@ -3920,9 +3921,10 @@ EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st7;
39203921
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
39213922
Insert on public.ft1
39223923
Remote SQL: INSERT INTO "S 1"."T 0"("C 1", c2, c3, c4, c5, c6, c7, c8) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
3924+
Batch Size: 1
39233925
-> Result
39243926
Output: NULL::integer, 1001, 101, 'foo'::text, NULL::timestamp with time zone, NULL::timestamp without time zone, NULL::character varying, 'ft1 '::character(10), NULL::user_enum
3925-
(4 rows)
3927+
(5 rows)
39263928

39273929
ALTER TABLE "S 1"."T 0" RENAME TO "T 1";
39283930
ALTER FOREIGN TABLE ft1 OPTIONS (SET table_name 'T 1');
@@ -4244,12 +4246,13 @@ INSERT INTO ft2 (c1,c2,c3) SELECT c1+1000,c2+100, c3 || c3 FROM ft2 LIMIT 20;
42444246
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
42454247
Insert on public.ft2
42464248
Remote SQL: INSERT INTO "S 1"."T 1"("C 1", c2, c3, c4, c5, c6, c7, c8) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
4249+
Batch Size: 1
42474250
-> Subquery Scan on "*SELECT*"
42484251
Output: "*SELECT*"."?column?", "*SELECT*"."?column?_1", NULL::integer, "*SELECT*"."?column?_2", NULL::timestamp with time zone, NULL::timestamp without time zone, NULL::character varying, 'ft2 '::character(10), NULL::user_enum
42494252
-> Foreign Scan on public.ft2 ft2_1
42504253
Output: (ft2_1.c1 + 1000), (ft2_1.c2 + 100), (ft2_1.c3 || ft2_1.c3)
42514254
Remote SQL: SELECT "C 1", c2, c3 FROM "S 1"."T 1" LIMIT 20::bigint
4252-
(7 rows)
4255+
(8 rows)
42534256

42544257
INSERT INTO ft2 (c1,c2,c3) SELECT c1+1000,c2+100, c3 || c3 FROM ft2 LIMIT 20;
42554258
INSERT INTO ft2 (c1,c2,c3)
@@ -5360,9 +5363,10 @@ INSERT INTO ft2 (c1,c2,c3) VALUES (1200,999,'foo') RETURNING tableoid::regclass;
53605363
Insert on public.ft2
53615364
Output: (ft2.tableoid)::regclass
53625365
Remote SQL: INSERT INTO "S 1"."T 1"("C 1", c2, c3, c4, c5, c6, c7, c8) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
5366+
Batch Size: 1
53635367
-> Result
53645368
Output: 1200, 999, NULL::integer, 'foo'::text, NULL::timestamp with time zone, NULL::timestamp without time zone, NULL::character varying, 'ft2 '::character(10), NULL::user_enum
5365-
(5 rows)
5369+
(6 rows)
53665370

53675371
INSERT INTO ft2 (c1,c2,c3) VALUES (1200,999,'foo') RETURNING tableoid::regclass;
53685372
tableoid
@@ -6212,9 +6216,10 @@ INSERT INTO rw_view VALUES (0, 5);
62126216
--------------------------------------------------------------------------------
62136217
Insert on public.foreign_tbl
62146218
Remote SQL: INSERT INTO public.base_tbl(a, b) VALUES ($1, $2) RETURNING a, b
6219+
Batch Size: 1
62156220
-> Result
62166221
Output: 0, 5
6217-
(4 rows)
6222+
(5 rows)
62186223

62196224
INSERT INTO rw_view VALUES (0, 5); -- should fail
62206225
ERROR: new row violates check option for view "rw_view"
@@ -6225,9 +6230,10 @@ INSERT INTO rw_view VALUES (0, 15);
62256230
--------------------------------------------------------------------------------
62266231
Insert on public.foreign_tbl
62276232
Remote SQL: INSERT INTO public.base_tbl(a, b) VALUES ($1, $2) RETURNING a, b
6233+
Batch Size: 1
62286234
-> Result
62296235
Output: 0, 15
6230-
(4 rows)
6236+
(5 rows)
62316237

62326238
INSERT INTO rw_view VALUES (0, 15); -- ok
62336239
SELECT * FROM foreign_tbl;
@@ -8923,7 +8929,7 @@ DO $d$
89238929
END;
89248930
$d$;
89258931
ERROR: invalid option "password"
8926-
HINT: Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, fetch_size
8932+
HINT: Valid options in this context are: service, passfile, channel_binding, connect_timeout, dbname, host, hostaddr, port, options, application_name, keepalives, keepalives_idle, keepalives_interval, keepalives_count, tcp_user_timeout, sslmode, sslcompression, sslcert, sslkey, sslrootcert, sslcrl, requirepeer, ssl_min_protocol_version, ssl_max_protocol_version, gssencmode, krbsrvname, gsslib, target_session_attrs, use_remote_estimate, fdw_startup_cost, fdw_tuple_cost, extensions, updatable, fetch_size, batch_size
89278933
CONTEXT: SQL statement "ALTER SERVER loopback_nopw OPTIONS (ADD password 'dummypw')"
89288934
PL/pgSQL function inline_code_block line 3 at EXECUTE
89298935
-- If we add a password for our user mapping instead, we should get a different
@@ -9112,3 +9118,138 @@ SELECT * FROM postgres_fdw_get_connections() ORDER BY 1;
91129118
loopback2 | t
91139119
(1 row)
91149120

9121+
-- ===================================================================
9122+
-- batch insert
9123+
-- ===================================================================
9124+
BEGIN;
9125+
CREATE SERVER batch10 FOREIGN DATA WRAPPER postgres_fdw OPTIONS( batch_size '10' );
9126+
SELECT count(*)
9127+
FROM pg_foreign_server
9128+
WHERE srvname = 'batch10'
9129+
AND srvoptions @> array['batch_size=10'];
9130+
count
9131+
-------
9132+
1
9133+
(1 row)
9134+
9135+
ALTER SERVER batch10 OPTIONS( SET batch_size '20' );
9136+
SELECT count(*)
9137+
FROM pg_foreign_server
9138+
WHERE srvname = 'batch10'
9139+
AND srvoptions @> array['batch_size=10'];
9140+
count
9141+
-------
9142+
0
9143+
(1 row)
9144+
9145+
SELECT count(*)
9146+
FROM pg_foreign_server
9147+
WHERE srvname = 'batch10'
9148+
AND srvoptions @> array['batch_size=20'];
9149+
count
9150+
-------
9151+
1
9152+
(1 row)
9153+
9154+
CREATE FOREIGN TABLE table30 ( x int ) SERVER batch10 OPTIONS ( batch_size '30' );
9155+
SELECT COUNT(*)
9156+
FROM pg_foreign_table
9157+
WHERE ftrelid = 'table30'::regclass
9158+
AND ftoptions @> array['batch_size=30'];
9159+
count
9160+
-------
9161+
1
9162+
(1 row)
9163+
9164+
ALTER FOREIGN TABLE table30 OPTIONS ( SET batch_size '40');
9165+
SELECT COUNT(*)
9166+
FROM pg_foreign_table
9167+
WHERE ftrelid = 'table30'::regclass
9168+
AND ftoptions @> array['batch_size=30'];
9169+
count
9170+
-------
9171+
0
9172+
(1 row)
9173+
9174+
SELECT COUNT(*)
9175+
FROM pg_foreign_table
9176+
WHERE ftrelid = 'table30'::regclass
9177+
AND ftoptions @> array['batch_size=40'];
9178+
count
9179+
-------
9180+
1
9181+
(1 row)
9182+
9183+
ROLLBACK;
9184+
CREATE TABLE batch_table ( x int );
9185+
CREATE FOREIGN TABLE ftable ( x int ) SERVER loopback OPTIONS ( table_name 'batch_table', batch_size '10' );
9186+
EXPLAIN (VERBOSE, COSTS OFF) INSERT INTO ftable SELECT * FROM generate_series(1, 10) i;
9187+
QUERY PLAN
9188+
-------------------------------------------------------------
9189+
Insert on public.ftable
9190+
Remote SQL: INSERT INTO public.batch_table(x) VALUES ($1)
9191+
Batch Size: 10
9192+
-> Function Scan on pg_catalog.generate_series i
9193+
Output: i.i
9194+
Function Call: generate_series(1, 10)
9195+
(6 rows)
9196+
9197+
INSERT INTO ftable SELECT * FROM generate_series(1, 10) i;
9198+
INSERT INTO ftable SELECT * FROM generate_series(11, 31) i;
9199+
INSERT INTO ftable VALUES (32);
9200+
INSERT INTO ftable VALUES (33), (34);
9201+
SELECT COUNT(*) FROM ftable;
9202+
count
9203+
-------
9204+
34
9205+
(1 row)
9206+
9207+
TRUNCATE batch_table;
9208+
DROP FOREIGN TABLE ftable;
9209+
-- Disable batch insert
9210+
CREATE FOREIGN TABLE ftable ( x int ) SERVER loopback OPTIONS ( table_name 'batch_table', batch_size '1' );
9211+
EXPLAIN (VERBOSE, COSTS OFF) INSERT INTO ftable VALUES (1), (2);
9212+
QUERY PLAN
9213+
-------------------------------------------------------------
9214+
Insert on public.ftable
9215+
Remote SQL: INSERT INTO public.batch_table(x) VALUES ($1)
9216+
Batch Size: 1
9217+
-> Values Scan on "*VALUES*"
9218+
Output: "*VALUES*".column1
9219+
(5 rows)
9220+
9221+
INSERT INTO ftable VALUES (1), (2);
9222+
SELECT COUNT(*) FROM ftable;
9223+
count
9224+
-------
9225+
2
9226+
(1 row)
9227+
9228+
DROP FOREIGN TABLE ftable;
9229+
DROP TABLE batch_table;
9230+
-- Use partitioning
9231+
CREATE TABLE batch_table ( x int ) PARTITION BY HASH (x);
9232+
CREATE TABLE batch_table_p0 (LIKE batch_table);
9233+
CREATE FOREIGN TABLE batch_table_p0f
9234+
PARTITION OF batch_table
9235+
FOR VALUES WITH (MODULUS 3, REMAINDER 0)
9236+
SERVER loopback
9237+
OPTIONS (table_name 'batch_table_p0', batch_size '10');
9238+
CREATE TABLE batch_table_p1 (LIKE batch_table);
9239+
CREATE FOREIGN TABLE batch_table_p1f
9240+
PARTITION OF batch_table
9241+
FOR VALUES WITH (MODULUS 3, REMAINDER 1)
9242+
SERVER loopback
9243+
OPTIONS (table_name 'batch_table_p1', batch_size '1');
9244+
CREATE TABLE batch_table_p2
9245+
PARTITION OF batch_table
9246+
FOR VALUES WITH (MODULUS 3, REMAINDER 2);
9247+
INSERT INTO batch_table SELECT * FROM generate_series(1, 66) i;
9248+
SELECT COUNT(*) FROM batch_table;
9249+
count
9250+
-------
9251+
66
9252+
(1 row)
9253+
9254+
-- Clean up
9255+
DROP TABLE batch_table CASCADE;

contrib/postgres_fdw/option.c

+14
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,17 @@ postgres_fdw_validator(PG_FUNCTION_ARGS)
142142
errmsg("%s requires a non-negative integer value",
143143
def->defname)));
144144
}
145+
else if (strcmp(def->defname, "batch_size") == 0)
146+
{
147+
int batch_size;
148+
149+
batch_size = strtol(defGetString(def), NULL, 10);
150+
if (batch_size <= 0)
151+
ereport(ERROR,
152+
(errcode(ERRCODE_SYNTAX_ERROR),
153+
errmsg("%s requires a non-negative integer value",
154+
def->defname)));
155+
}
145156
else if (strcmp(def->defname, "password_required") == 0)
146157
{
147158
bool pw_required = defGetBoolean(def);
@@ -203,6 +214,9 @@ InitPgFdwOptions(void)
203214
/* fetch_size is available on both server and table */
204215
{"fetch_size", ForeignServerRelationId, false},
205216
{"fetch_size", ForeignTableRelationId, false},
217+
/* batch_size is available on both server and table */
218+
{"batch_size", ForeignServerRelationId, false},
219+
{"batch_size", ForeignTableRelationId, false},
206220
{"password_required", UserMappingRelationId, false},
207221

208222
/*

0 commit comments

Comments
 (0)