Skip to content

Commit 959b38d

Browse files
committed
Invent --transaction-size option for pg_restore.
This patch allows pg_restore to wrap its commands into transaction blocks, somewhat like --single-transaction, except that we commit and start a new block after every N objects. Using this mode with a size limit of 1000 or so objects greatly reduces the number of transactions consumed by the restore, while preventing any one transaction from taking enough locks to overrun the receiving server's shared lock table. (A value of 1000 works well with the default lock table size of around 6400 locks. Higher --transaction-size values can be used if one has increased the receiving server's lock table size.) Excessive consumption of XIDs has been reported as a problem for pg_upgrade in particular, but it could be bad for any restore; and the change also reduces the number of fsyncs and amount of WAL generated, so it should provide speed benefits too. This patch does not try to make parallel workers batch the SQL commands they issue. The trouble with doing that is that other workers may need to see the objects a worker creates right away. Possibly this can be improved later. In this patch I have hard-wired pg_upgrade to use a transaction size of 1000 divided by the number of parallel restore jobs allowed (without that, we'd still be at risk of overrunning the shared lock table). Perhaps there would be value in adding another pg_upgrade option to allow user control of that, but I'm unsure that it's worth the trouble; I think few users would use it, and any who did would see not that much benefit compared to the default. Patch by me, but the original idea to batch SQL commands during restore is due to Robins Tharakan. Discussion: https://postgr.es/m/[email protected]
1 parent a45c78e commit 959b38d

File tree

7 files changed

+220
-8
lines changed

7 files changed

+220
-8
lines changed

doc/src/sgml/ref/pg_restore.sgml

+24
Original file line numberDiff line numberDiff line change
@@ -786,6 +786,30 @@ PostgreSQL documentation
786786
</listitem>
787787
</varlistentry>
788788

789+
<varlistentry>
790+
<term><option>--transaction-size=<replaceable class="parameter">N</replaceable></option></term>
791+
<listitem>
792+
<para>
793+
Execute the restore as a series of transactions, each processing
794+
up to <replaceable class="parameter">N</replaceable> database
795+
objects. This option implies <option>--exit-on-error</option>.
796+
</para>
797+
<para>
798+
<option>--transaction-size</option> offers an intermediate choice
799+
between the default behavior (one transaction per SQL command)
800+
and <option>-1</option>/<option>--single-transaction</option>
801+
(one transaction for all restored objects).
802+
While <option>--single-transaction</option> has the least
803+
overhead, it may be impractical for large databases because the
804+
transaction will take a lock on each restored object, possibly
805+
exhausting the server's lock table space.
806+
Using <option>--transaction-size</option> with a size of a few
807+
thousand objects offers nearly the same performance benefits while
808+
capping the amount of lock table space needed.
809+
</para>
810+
</listitem>
811+
</varlistentry>
812+
789813
<varlistentry>
790814
<term><option>--use-set-session-authorization</option></term>
791815
<listitem>

src/bin/pg_dump/pg_backup.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,9 @@ typedef struct _restoreOptions
149149
* compression */
150150
int suppressDumpWarnings; /* Suppress output of WARNING entries
151151
* to stderr */
152-
bool single_txn;
152+
153+
bool single_txn; /* restore all TOCs in one transaction */
154+
int txn_size; /* restore this many TOCs per txn, if > 0 */
153155

154156
bool *idWanted; /* array showing which dump IDs to emit */
155157
int enable_row_security;

src/bin/pg_dump/pg_backup_archiver.c

+133-6
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,28 @@ RestoreArchive(Archive *AHX)
502502
/* Otherwise, drop anything that's selected and has a dropStmt */
503503
if (((te->reqs & (REQ_SCHEMA | REQ_DATA)) != 0) && te->dropStmt)
504504
{
505+
bool not_allowed_in_txn = false;
506+
505507
pg_log_info("dropping %s %s", te->desc, te->tag);
508+
509+
/*
510+
* In --transaction-size mode, we have to temporarily exit our
511+
* transaction block to drop objects that can't be dropped
512+
* within a transaction.
513+
*/
514+
if (ropt->txn_size > 0)
515+
{
516+
if (strcmp(te->desc, "DATABASE") == 0 ||
517+
strcmp(te->desc, "DATABASE PROPERTIES") == 0)
518+
{
519+
not_allowed_in_txn = true;
520+
if (AH->connection)
521+
CommitTransaction(AHX);
522+
else
523+
ahprintf(AH, "COMMIT;\n");
524+
}
525+
}
526+
506527
/* Select owner and schema as necessary */
507528
_becomeOwner(AH, te);
508529
_selectOutputSchema(AH, te->namespace);
@@ -628,6 +649,33 @@ RestoreArchive(Archive *AHX)
628649
}
629650
}
630651
}
652+
653+
/*
654+
* In --transaction-size mode, re-establish the transaction
655+
* block if needed; otherwise, commit after every N drops.
656+
*/
657+
if (ropt->txn_size > 0)
658+
{
659+
if (not_allowed_in_txn)
660+
{
661+
if (AH->connection)
662+
StartTransaction(AHX);
663+
else
664+
ahprintf(AH, "BEGIN;\n");
665+
AH->txnCount = 0;
666+
}
667+
else if (++AH->txnCount >= ropt->txn_size)
668+
{
669+
if (AH->connection)
670+
{
671+
CommitTransaction(AHX);
672+
StartTransaction(AHX);
673+
}
674+
else
675+
ahprintf(AH, "COMMIT;\nBEGIN;\n");
676+
AH->txnCount = 0;
677+
}
678+
}
631679
}
632680
}
633681

@@ -724,7 +772,11 @@ RestoreArchive(Archive *AHX)
724772
}
725773
}
726774

727-
if (ropt->single_txn)
775+
/*
776+
* Close out any persistent transaction we may have. While these two
777+
* cases are started in different places, we can end both cases here.
778+
*/
779+
if (ropt->single_txn || ropt->txn_size > 0)
728780
{
729781
if (AH->connection)
730782
CommitTransaction(AHX);
@@ -785,6 +837,25 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel)
785837
*/
786838
if ((reqs & REQ_SCHEMA) != 0)
787839
{
840+
bool object_is_db = false;
841+
842+
/*
843+
* In --transaction-size mode, must exit our transaction block to
844+
* create a database or set its properties.
845+
*/
846+
if (strcmp(te->desc, "DATABASE") == 0 ||
847+
strcmp(te->desc, "DATABASE PROPERTIES") == 0)
848+
{
849+
object_is_db = true;
850+
if (ropt->txn_size > 0)
851+
{
852+
if (AH->connection)
853+
CommitTransaction(&AH->public);
854+
else
855+
ahprintf(AH, "COMMIT;\n\n");
856+
}
857+
}
858+
788859
/* Show namespace in log message if available */
789860
if (te->namespace)
790861
pg_log_info("creating %s \"%s.%s\"",
@@ -835,10 +906,10 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel)
835906
/*
836907
* If we created a DB, connect to it. Also, if we changed DB
837908
* properties, reconnect to ensure that relevant GUC settings are
838-
* applied to our session.
909+
* applied to our session. (That also restarts the transaction block
910+
* in --transaction-size mode.)
839911
*/
840-
if (strcmp(te->desc, "DATABASE") == 0 ||
841-
strcmp(te->desc, "DATABASE PROPERTIES") == 0)
912+
if (object_is_db)
842913
{
843914
pg_log_info("connecting to new database \"%s\"", te->tag);
844915
_reconnectToDB(AH, te->tag);
@@ -964,6 +1035,25 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel)
9641035
}
9651036
}
9661037

1038+
/*
1039+
* If we emitted anything for this TOC entry, that counts as one action
1040+
* against the transaction-size limit. Commit if it's time to.
1041+
*/
1042+
if ((reqs & (REQ_SCHEMA | REQ_DATA)) != 0 && ropt->txn_size > 0)
1043+
{
1044+
if (++AH->txnCount >= ropt->txn_size)
1045+
{
1046+
if (AH->connection)
1047+
{
1048+
CommitTransaction(&AH->public);
1049+
StartTransaction(&AH->public);
1050+
}
1051+
else
1052+
ahprintf(AH, "COMMIT;\nBEGIN;\n\n");
1053+
AH->txnCount = 0;
1054+
}
1055+
}
1056+
9671057
if (AH->public.n_errors > 0 && status == WORKER_OK)
9681058
status = WORKER_IGNORED_ERRORS;
9691059

@@ -1310,7 +1400,12 @@ StartRestoreLOs(ArchiveHandle *AH)
13101400
{
13111401
RestoreOptions *ropt = AH->public.ropt;
13121402

1313-
if (!ropt->single_txn)
1403+
/*
1404+
* LOs must be restored within a transaction block, since we need the LO
1405+
* handle to stay open while we write it. Establish a transaction unless
1406+
* there's one being used globally.
1407+
*/
1408+
if (!(ropt->single_txn || ropt->txn_size > 0))
13141409
{
13151410
if (AH->connection)
13161411
StartTransaction(&AH->public);
@@ -1329,7 +1424,7 @@ EndRestoreLOs(ArchiveHandle *AH)
13291424
{
13301425
RestoreOptions *ropt = AH->public.ropt;
13311426

1332-
if (!ropt->single_txn)
1427+
if (!(ropt->single_txn || ropt->txn_size > 0))
13331428
{
13341429
if (AH->connection)
13351430
CommitTransaction(&AH->public);
@@ -3171,6 +3266,19 @@ _doSetFixedOutputState(ArchiveHandle *AH)
31713266
else
31723267
ahprintf(AH, "SET row_security = off;\n");
31733268

3269+
/*
3270+
* In --transaction-size mode, we should always be in a transaction when
3271+
* we begin to restore objects.
3272+
*/
3273+
if (ropt && ropt->txn_size > 0)
3274+
{
3275+
if (AH->connection)
3276+
StartTransaction(&AH->public);
3277+
else
3278+
ahprintf(AH, "\nBEGIN;\n");
3279+
AH->txnCount = 0;
3280+
}
3281+
31743282
ahprintf(AH, "\n");
31753283
}
31763284

@@ -4043,6 +4151,14 @@ restore_toc_entries_prefork(ArchiveHandle *AH, TocEntry *pending_list)
40434151
}
40444152
}
40454153

4154+
/*
4155+
* In --transaction-size mode, we must commit the open transaction before
4156+
* dropping the database connection. This also ensures that child workers
4157+
* can see the objects we've created so far.
4158+
*/
4159+
if (AH->public.ropt->txn_size > 0)
4160+
CommitTransaction(&AH->public);
4161+
40464162
/*
40474163
* Now close parent connection in prep for parallel steps. We do this
40484164
* mainly to ensure that we don't exceed the specified number of parallel
@@ -4782,6 +4898,10 @@ CloneArchive(ArchiveHandle *AH)
47824898
clone = (ArchiveHandle *) pg_malloc(sizeof(ArchiveHandle));
47834899
memcpy(clone, AH, sizeof(ArchiveHandle));
47844900

4901+
/* Likewise flat-copy the RestoreOptions, so we can alter them locally */
4902+
clone->public.ropt = (RestoreOptions *) pg_malloc(sizeof(RestoreOptions));
4903+
memcpy(clone->public.ropt, AH->public.ropt, sizeof(RestoreOptions));
4904+
47854905
/* Handle format-independent fields */
47864906
memset(&(clone->sqlparse), 0, sizeof(clone->sqlparse));
47874907

@@ -4803,6 +4923,13 @@ CloneArchive(ArchiveHandle *AH)
48034923
/* clones should not share lo_buf */
48044924
clone->lo_buf = NULL;
48054925

4926+
/*
4927+
* Clone connections disregard --transaction-size; they must commit after
4928+
* each command so that the results are immediately visible to other
4929+
* workers.
4930+
*/
4931+
clone->public.ropt->txn_size = 0;
4932+
48064933
/*
48074934
* Connect our new clone object to the database, using the same connection
48084935
* parameters used for the original connection.

src/bin/pg_dump/pg_backup_archiver.h

+3
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,9 @@ struct _archiveHandle
324324
char *currTablespace; /* current tablespace, or NULL */
325325
char *currTableAm; /* current table access method, or NULL */
326326

327+
/* in --transaction-size mode, this counts objects emitted in cur xact */
328+
int txnCount;
329+
327330
void *lo_buf;
328331
size_t lo_buf_used;
329332
size_t lo_buf_size;

src/bin/pg_dump/pg_backup_db.c

+18
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,7 @@ IssueCommandPerBlob(ArchiveHandle *AH, TocEntry *te,
554554
{
555555
/* Make a writable copy of the command string */
556556
char *buf = pg_strdup(te->defn);
557+
RestoreOptions *ropt = AH->public.ropt;
557558
char *st;
558559
char *en;
559560

@@ -562,6 +563,23 @@ IssueCommandPerBlob(ArchiveHandle *AH, TocEntry *te,
562563
{
563564
*en++ = '\0';
564565
ahprintf(AH, "%s%s%s;\n", cmdBegin, st, cmdEnd);
566+
567+
/* In --transaction-size mode, count each command as an action */
568+
if (ropt && ropt->txn_size > 0)
569+
{
570+
if (++AH->txnCount >= ropt->txn_size)
571+
{
572+
if (AH->connection)
573+
{
574+
CommitTransaction(&AH->public);
575+
StartTransaction(&AH->public);
576+
}
577+
else
578+
ahprintf(AH, "COMMIT;\nBEGIN;\n\n");
579+
AH->txnCount = 0;
580+
}
581+
}
582+
565583
st = en;
566584
}
567585
ahprintf(AH, "\n");

src/bin/pg_dump/pg_restore.c

+14-1
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ main(int argc, char **argv)
120120
{"role", required_argument, NULL, 2},
121121
{"section", required_argument, NULL, 3},
122122
{"strict-names", no_argument, &strict_names, 1},
123+
{"transaction-size", required_argument, NULL, 5},
123124
{"use-set-session-authorization", no_argument, &use_setsessauth, 1},
124125
{"no-comments", no_argument, &no_comments, 1},
125126
{"no-publications", no_argument, &no_publications, 1},
@@ -289,10 +290,18 @@ main(int argc, char **argv)
289290
set_dump_section(optarg, &(opts->dumpSections));
290291
break;
291292

292-
case 4:
293+
case 4: /* filter */
293294
read_restore_filters(optarg, opts);
294295
break;
295296

297+
case 5: /* transaction-size */
298+
if (!option_parse_int(optarg, "--transaction-size",
299+
1, INT_MAX,
300+
&opts->txn_size))
301+
exit(1);
302+
opts->exit_on_error = true;
303+
break;
304+
296305
default:
297306
/* getopt_long already emitted a complaint */
298307
pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -337,6 +346,9 @@ main(int argc, char **argv)
337346
if (opts->dataOnly && opts->dropSchema)
338347
pg_fatal("options -c/--clean and -a/--data-only cannot be used together");
339348

349+
if (opts->single_txn && opts->txn_size > 0)
350+
pg_fatal("options -1/--single-transaction and --transaction-size cannot be used together");
351+
340352
/*
341353
* -C is not compatible with -1, because we can't create a database inside
342354
* a transaction block.
@@ -484,6 +496,7 @@ usage(const char *progname)
484496
printf(_(" --section=SECTION restore named section (pre-data, data, or post-data)\n"));
485497
printf(_(" --strict-names require table and/or schema include patterns to\n"
486498
" match at least one entity each\n"));
499+
printf(_(" --transaction-size=N commit after every N objects\n"));
487500
printf(_(" --use-set-session-authorization\n"
488501
" use SET SESSION AUTHORIZATION commands instead of\n"
489502
" ALTER OWNER commands to set ownership\n"));

0 commit comments

Comments
 (0)