Skip to content

Commit 3eb9a5e

Browse files
committedAug 3, 2017
Fix pg_dump/pg_restore to emit REFRESH MATERIALIZED VIEW commands last.
Because we push all ACL (i.e. GRANT/REVOKE) restore steps to the end, materialized view refreshes were occurring while the permissions on referenced objects were still at defaults. This led to failures if, say, an MV owned by user A reads from a table owned by user B, even if B had granted the necessary privileges to A. We've had multiple complaints about that type of restore failure, most recently from Jordan Gigov. The ideal fix for this would be to start treating ACLs as dependency- sortable objects, rather than hard-wiring anything about their dump order (the existing approach is a messy kluge dating to commit dc0e76c). But that's going to be a rather major change, and it certainly wouldn't lead to a back-patchable fix. As a short-term solution, convert the existing two-pass hack (ie, normal objects then ACLs) to a three-pass hack, ie, normal objects then ACLs then matview refreshes. Because this happens in RestoreArchive(), it will also fix the problem when restoring from an existing archive-format dump. (Note this means that if a matview refresh would have failed under the permissions prevailing at dump time, it'll fail during restore as well. We'll define that as user error rather than something we should try to work around.) To avoid performance loss in parallel restore, we need the matview refreshes to still be parallelizable. Hence, clean things up enough so that both ACLs and matviews are handled by the parallel restore infrastructure, instead of reverting back to serial restore for ACLs. There is still a final serial step, but it shouldn't normally have to do anything; it's only there to try to recover if we get stuck due to some problem like unresolved circular dependencies. Patch by me, but it owes something to an earlier attempt by Kevin Grittner. Back-patch to 9.3 where materialized views were introduced. Discussion: https://postgr.es/m/28572.1500912583@sss.pgh.pa.us

File tree

2 files changed

+264
-129
lines changed

2 files changed

+264
-129
lines changed
 

‎src/bin/pg_dump/pg_backup_archiver.c

Lines changed: 239 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ static ArchiveHandle *_allocAH(const char *FileSpec, const ArchiveFormat fmt,
5858
SetupWorkerPtrType setupWorkerPtr);
5959
static void _getObjectDescription(PQExpBuffer buf, TocEntry *te,
6060
ArchiveHandle *AH);
61-
static void _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData, bool acl_pass);
61+
static void _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData);
6262
static char *replace_line_endings(const char *str);
6363
static void _doSetFixedOutputState(ArchiveHandle *AH);
6464
static void _doSetSessionAuth(ArchiveHandle *AH, const char *user);
@@ -71,6 +71,7 @@ static void _selectTablespace(ArchiveHandle *AH, const char *tablespace);
7171
static void processEncodingEntry(ArchiveHandle *AH, TocEntry *te);
7272
static void processStdStringsEntry(ArchiveHandle *AH, TocEntry *te);
7373
static teReqs _tocEntryRequired(TocEntry *te, teSection curSection, RestoreOptions *ropt);
74+
static RestorePass _tocEntryRestorePass(TocEntry *te);
7475
static bool _tocEntryIsACL(TocEntry *te);
7576
static void _disableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te);
7677
static void _enableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te);
@@ -86,13 +87,18 @@ static OutputContext SaveOutput(ArchiveHandle *AH);
8687
static void RestoreOutput(ArchiveHandle *AH, OutputContext savedContext);
8788

8889
static int restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel);
89-
static void restore_toc_entries_prefork(ArchiveHandle *AH);
90-
static void restore_toc_entries_parallel(ArchiveHandle *AH, ParallelState *pstate,
90+
static void restore_toc_entries_prefork(ArchiveHandle *AH,
91+
TocEntry *pending_list);
92+
static void restore_toc_entries_parallel(ArchiveHandle *AH,
93+
ParallelState *pstate,
94+
TocEntry *pending_list);
95+
static void restore_toc_entries_postfork(ArchiveHandle *AH,
9196
TocEntry *pending_list);
92-
static void restore_toc_entries_postfork(ArchiveHandle *AH, TocEntry *pending_list);
9397
static void par_list_header_init(TocEntry *l);
9498
static void par_list_append(TocEntry *l, TocEntry *te);
9599
static void par_list_remove(TocEntry *te);
100+
static void move_to_ready_list(TocEntry *pending_list, TocEntry *ready_list,
101+
RestorePass pass);
96102
static TocEntry *get_next_work_item(ArchiveHandle *AH,
97103
TocEntry *ready_list,
98104
ParallelState *pstate);
@@ -625,20 +631,18 @@ RestoreArchive(Archive *AHX)
625631
AH->currSchema = NULL;
626632
}
627633

628-
/*
629-
* In serial mode, we now process each non-ACL TOC entry.
630-
*
631-
* In parallel mode, turn control over to the parallel-restore logic.
632-
*/
633634
if (parallel_mode)
634635
{
636+
/*
637+
* In parallel mode, turn control over to the parallel-restore logic.
638+
*/
635639
ParallelState *pstate;
636640
TocEntry pending_list;
637641

638642
par_list_header_init(&pending_list);
639643

640644
/* This runs PRE_DATA items and then disconnects from the database */
641-
restore_toc_entries_prefork(AH);
645+
restore_toc_entries_prefork(AH, &pending_list);
642646
Assert(AH->connection == NULL);
643647

644648
/* ParallelBackupStart() will actually fork the processes */
@@ -652,28 +656,51 @@ RestoreArchive(Archive *AHX)
652656
}
653657
else
654658
{
659+
/*
660+
* In serial mode, process everything in three phases: normal items,
661+
* then ACLs, then matview refresh items. We might be able to skip
662+
* one or both extra phases in some cases, eg data-only restores.
663+
*/
664+
bool haveACL = false;
665+
bool haveRefresh = false;
666+
655667
for (te = AH->toc->next; te != AH->toc; te = te->next)
656-
(void) restore_toc_entry(AH, te, false);
657-
}
668+
{
669+
if ((te->reqs & (REQ_SCHEMA | REQ_DATA)) == 0)
670+
continue; /* ignore if not to be dumped at all */
658671

659-
/*
660-
* Scan TOC again to output ownership commands and ACLs
661-
*/
662-
for (te = AH->toc->next; te != AH->toc; te = te->next)
663-
{
664-
AH->currentTE = te;
672+
switch (_tocEntryRestorePass(te))
673+
{
674+
case RESTORE_PASS_MAIN:
675+
(void) restore_toc_entry(AH, te, false);
676+
break;
677+
case RESTORE_PASS_ACL:
678+
haveACL = true;
679+
break;
680+
case RESTORE_PASS_REFRESH:
681+
haveRefresh = true;
682+
break;
683+
}
684+
}
665685

666-
/* Both schema and data objects might now have ownership/ACLs */
667-
if ((te->reqs & (REQ_SCHEMA | REQ_DATA)) != 0)
686+
if (haveACL)
668687
{
669-
/* Show namespace if available */
670-
if (te->namespace)
671-
ahlog(AH, 1, "setting owner and privileges for %s \"%s.%s\"\n",
672-
te->desc, te->namespace, te->tag);
673-
else
674-
ahlog(AH, 1, "setting owner and privileges for %s \"%s\"\n",
675-
te->desc, te->tag);
676-
_printTocEntry(AH, te, false, true);
688+
for (te = AH->toc->next; te != AH->toc; te = te->next)
689+
{
690+
if ((te->reqs & (REQ_SCHEMA | REQ_DATA)) != 0 &&
691+
_tocEntryRestorePass(te) == RESTORE_PASS_ACL)
692+
(void) restore_toc_entry(AH, te, false);
693+
}
694+
}
695+
696+
if (haveRefresh)
697+
{
698+
for (te = AH->toc->next; te != AH->toc; te = te->next)
699+
{
700+
if ((te->reqs & (REQ_SCHEMA | REQ_DATA)) != 0 &&
701+
_tocEntryRestorePass(te) == RESTORE_PASS_REFRESH)
702+
(void) restore_toc_entry(AH, te, false);
703+
}
677704
}
678705
}
679706

@@ -720,10 +747,7 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel)
720747
AH->currentTE = te;
721748

722749
/* Work out what, if anything, we want from this entry */
723-
if (_tocEntryIsACL(te))
724-
reqs = 0; /* ACLs are never restored here */
725-
else
726-
reqs = te->reqs;
750+
reqs = te->reqs;
727751

728752
/*
729753
* Ignore DATABASE entry unless we should create it. We must check this
@@ -744,17 +768,19 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel)
744768

745769
defnDumped = false;
746770

747-
if ((reqs & REQ_SCHEMA) != 0) /* We want the schema */
771+
/*
772+
* If it has a schema component that we want, then process that
773+
*/
774+
if ((reqs & REQ_SCHEMA) != 0)
748775
{
749-
/* Show namespace if available */
776+
/* Show namespace in log message if available */
750777
if (te->namespace)
751778
ahlog(AH, 1, "creating %s \"%s.%s\"\n",
752779
te->desc, te->namespace, te->tag);
753780
else
754781
ahlog(AH, 1, "creating %s \"%s\"\n", te->desc, te->tag);
755782

756-
757-
_printTocEntry(AH, te, false, false);
783+
_printTocEntry(AH, te, false);
758784
defnDumped = true;
759785

760786
if (strcmp(te->desc, "TABLE") == 0)
@@ -810,7 +836,7 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel)
810836
}
811837

812838
/*
813-
* If we have a data component, then process it
839+
* If it has a data component that we want, then process that
814840
*/
815841
if ((reqs & REQ_DATA) != 0)
816842
{
@@ -826,7 +852,7 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel)
826852
*/
827853
if (AH->PrintTocDataPtr != NULL)
828854
{
829-
_printTocEntry(AH, te, true, false);
855+
_printTocEntry(AH, te, true);
830856

831857
if (strcmp(te->desc, "BLOBS") == 0 ||
832858
strcmp(te->desc, "BLOB COMMENTS") == 0)
@@ -914,7 +940,7 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel)
914940
{
915941
/* If we haven't already dumped the defn part, do so now */
916942
ahlog(AH, 1, "executing %s %s\n", te->desc, te->tag);
917-
_printTocEntry(AH, te, false, false);
943+
_printTocEntry(AH, te, false);
918944
}
919945
}
920946

@@ -2943,8 +2969,30 @@ _tocEntryRequired(TocEntry *te, teSection curSection, RestoreOptions *ropt)
29432969
return res;
29442970
}
29452971

2972+
/*
2973+
* Identify which pass we should restore this TOC entry in.
2974+
*
2975+
* See notes with the RestorePass typedef in pg_backup_archiver.h.
2976+
*/
2977+
static RestorePass
2978+
_tocEntryRestorePass(TocEntry *te)
2979+
{
2980+
/* "ACL LANGUAGE" was a crock emitted only in PG 7.4 */
2981+
if (strcmp(te->desc, "ACL") == 0 ||
2982+
strcmp(te->desc, "ACL LANGUAGE") == 0 ||
2983+
strcmp(te->desc, "DEFAULT ACL") == 0)
2984+
return RESTORE_PASS_ACL;
2985+
if (strcmp(te->desc, "MATERIALIZED VIEW DATA") == 0)
2986+
return RESTORE_PASS_REFRESH;
2987+
return RESTORE_PASS_MAIN;
2988+
}
2989+
29462990
/*
29472991
* Identify TOC entries that are ACLs.
2992+
*
2993+
* Note: it seems worth duplicating some code here to avoid a hard-wired
2994+
* assumption that these are exactly the same entries that we restore during
2995+
* the RESTORE_PASS_ACL phase.
29482996
*/
29492997
static bool
29502998
_tocEntryIsACL(TocEntry *te)
@@ -3364,23 +3412,18 @@ _getObjectDescription(PQExpBuffer buf, TocEntry *te, ArchiveHandle *AH)
33643412
type);
33653413
}
33663414

3415+
/*
3416+
* Emit the SQL commands to create the object represented by a TOC entry
3417+
*
3418+
* This now also includes issuing an ALTER OWNER command to restore the
3419+
* object's ownership, if wanted. But note that the object's permissions
3420+
* will remain at default, until the matching ACL TOC entry is restored.
3421+
*/
33673422
static void
3368-
_printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData, bool acl_pass)
3423+
_printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData)
33693424
{
33703425
RestoreOptions *ropt = AH->public.ropt;
33713426

3372-
/* ACLs are dumped only during acl pass */
3373-
if (acl_pass)
3374-
{
3375-
if (!_tocEntryIsACL(te))
3376-
return;
3377-
}
3378-
else
3379-
{
3380-
if (_tocEntryIsACL(te))
3381-
return;
3382-
}
3383-
33843427
/*
33853428
* Avoid dumping the public schema, as it will already be created ...
33863429
* unless we are using --clean mode (and *not* --create mode), in which
@@ -3567,7 +3610,7 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData, bool acl_pass)
35673610
* If it's an ACL entry, it might contain SET SESSION AUTHORIZATION
35683611
* commands, so we can no longer assume we know the current auth setting.
35693612
*/
3570-
if (acl_pass)
3613+
if (_tocEntryIsACL(te))
35713614
{
35723615
if (AH->currUser)
35733616
free(AH->currUser);
@@ -3597,6 +3640,9 @@ replace_line_endings(const char *str)
35973640
return result;
35983641
}
35993642

3643+
/*
3644+
* Write the file header for a custom-format archive
3645+
*/
36003646
void
36013647
WriteHead(ArchiveHandle *AH)
36023648
{
@@ -3772,16 +3818,14 @@ dumpTimestamp(ArchiveHandle *AH, const char *msg, time_t tim)
37723818
/*
37733819
* Main engine for parallel restore.
37743820
*
3775-
* Work is done in three phases.
3776-
* First we process all SECTION_PRE_DATA tocEntries, in a single connection,
3777-
* just as for a standard restore. Second we process the remaining non-ACL
3778-
* steps in parallel worker children (threads on Windows, processes on Unix),
3779-
* each of which connects separately to the database. Finally we process all
3780-
* the ACL entries in a single connection (that happens back in
3781-
* RestoreArchive).
3821+
* Parallel restore is done in three phases. In this first phase,
3822+
* we'll process all SECTION_PRE_DATA TOC entries that are allowed to be
3823+
* processed in the RESTORE_PASS_MAIN pass. (In practice, that's all
3824+
* PRE_DATA items other than ACLs.) Entries we can't process now are
3825+
* added to the pending_list for later phases to deal with.
37823826
*/
37833827
static void
3784-
restore_toc_entries_prefork(ArchiveHandle *AH)
3828+
restore_toc_entries_prefork(ArchiveHandle *AH, TocEntry *pending_list)
37853829
{
37863830
bool skipped_some;
37873831
TocEntry *next_work_item;
@@ -3799,23 +3843,31 @@ restore_toc_entries_prefork(ArchiveHandle *AH)
37993843
* about showing all the dependencies of SECTION_PRE_DATA items, so we do
38003844
* not risk trying to process them out-of-order.
38013845
*
3846+
* Stuff that we can't do immediately gets added to the pending_list.
3847+
* Note: we don't yet filter out entries that aren't going to be restored.
3848+
* They might participate in dependency chains connecting entries that
3849+
* should be restored, so we treat them as live until we actually process
3850+
* them.
3851+
*
38023852
* Note: as of 9.2, it should be guaranteed that all PRE_DATA items appear
38033853
* before DATA items, and all DATA items before POST_DATA items. That is
38043854
* not certain to be true in older archives, though, so this loop is coded
38053855
* to not assume it.
38063856
*/
3857+
AH->restorePass = RESTORE_PASS_MAIN;
38073858
skipped_some = false;
38083859
for (next_work_item = AH->toc->next; next_work_item != AH->toc; next_work_item = next_work_item->next)
38093860
{
3810-
/* NB: process-or-continue logic must be the inverse of loop below */
3861+
bool do_now = true;
3862+
38113863
if (next_work_item->section != SECTION_PRE_DATA)
38123864
{
38133865
/* DATA and POST_DATA items are just ignored for now */
38143866
if (next_work_item->section == SECTION_DATA ||
38153867
next_work_item->section == SECTION_POST_DATA)
38163868
{
3869+
do_now = false;
38173870
skipped_some = true;
3818-
continue;
38193871
}
38203872
else
38213873
{
@@ -3826,18 +3878,35 @@ restore_toc_entries_prefork(ArchiveHandle *AH)
38263878
* comment's dependencies are satisfied, so skip it for now.
38273879
*/
38283880
if (skipped_some)
3829-
continue;
3881+
do_now = false;
38303882
}
38313883
}
38323884

3833-
ahlog(AH, 1, "processing item %d %s %s\n",
3834-
next_work_item->dumpId,
3835-
next_work_item->desc, next_work_item->tag);
3885+
/*
3886+
* Also skip items that need to be forced into later passes. We need
3887+
* not set skipped_some in this case, since by assumption no main-pass
3888+
* items could depend on these.
3889+
*/
3890+
if (_tocEntryRestorePass(next_work_item) != RESTORE_PASS_MAIN)
3891+
do_now = false;
3892+
3893+
if (do_now)
3894+
{
3895+
/* OK, restore the item and update its dependencies */
3896+
ahlog(AH, 1, "processing item %d %s %s\n",
3897+
next_work_item->dumpId,
3898+
next_work_item->desc, next_work_item->tag);
38363899

3837-
(void) restore_toc_entry(AH, next_work_item, false);
3900+
(void) restore_toc_entry(AH, next_work_item, false);
38383901

3839-
/* there should be no touch of ready_list here, so pass NULL */
3840-
reduce_dependencies(AH, next_work_item, NULL);
3902+
/* there should be no touch of ready_list here, so pass NULL */
3903+
reduce_dependencies(AH, next_work_item, NULL);
3904+
}
3905+
else
3906+
{
3907+
/* Nope, so add it to pending_list */
3908+
par_list_append(pending_list, next_work_item);
3909+
}
38413910
}
38423911

38433912
/*
@@ -3863,104 +3932,95 @@ restore_toc_entries_prefork(ArchiveHandle *AH)
38633932
/*
38643933
* Main engine for parallel restore.
38653934
*
3866-
* Work is done in three phases.
3867-
* First we process all SECTION_PRE_DATA tocEntries, in a single connection,
3868-
* just as for a standard restore. This is done in restore_toc_entries_prefork().
3869-
* Second we process the remaining non-ACL steps in parallel worker children
3870-
* (threads on Windows, processes on Unix), these fork off and set up their
3871-
* connections before we call restore_toc_entries_parallel_forked.
3872-
* Finally we process all the ACL entries in a single connection (that happens
3873-
* back in RestoreArchive).
3935+
* Parallel restore is done in three phases. In this second phase,
3936+
* we process entries by dispatching them to parallel worker children
3937+
* (processes on Unix, threads on Windows), each of which connects
3938+
* separately to the database. Inter-entry dependencies are respected,
3939+
* and so is the RestorePass multi-pass structure. When we can no longer
3940+
* make any entries ready to process, we exit. Normally, there will be
3941+
* nothing left to do; but if there is, the third phase will mop up.
38743942
*/
38753943
static void
38763944
restore_toc_entries_parallel(ArchiveHandle *AH, ParallelState *pstate,
38773945
TocEntry *pending_list)
38783946
{
3879-
bool skipped_some;
38803947
TocEntry ready_list;
38813948
TocEntry *next_work_item;
38823949

38833950
ahlog(AH, 2, "entering restore_toc_entries_parallel\n");
38843951

38853952
/*
3886-
* Initialize the lists of ready items, the list for pending items has
3887-
* already been initialized in the caller. After this setup, the pending
3888-
* list is everything that needs to be done but is blocked by one or more
3889-
* dependencies, while the ready list contains items that have no
3890-
* remaining dependencies. Note: we don't yet filter out entries that
3891-
* aren't going to be restored. They might participate in dependency
3892-
* chains connecting entries that should be restored, so we treat them as
3893-
* live until we actually process them.
3953+
* The pending_list contains all items that we need to restore. Move all
3954+
* items that are available to process immediately into the ready_list.
3955+
* After this setup, the pending list is everything that needs to be done
3956+
* but is blocked by one or more dependencies, while the ready list
3957+
* contains items that have no remaining dependencies and are OK to
3958+
* process in the current restore pass.
38943959
*/
38953960
par_list_header_init(&ready_list);
3896-
skipped_some = false;
3897-
for (next_work_item = AH->toc->next; next_work_item != AH->toc; next_work_item = next_work_item->next)
3898-
{
3899-
/* NB: process-or-continue logic must be the inverse of loop above */
3900-
if (next_work_item->section == SECTION_PRE_DATA)
3901-
{
3902-
/* All PRE_DATA items were dealt with above */
3903-
continue;
3904-
}
3905-
if (next_work_item->section == SECTION_DATA ||
3906-
next_work_item->section == SECTION_POST_DATA)
3907-
{
3908-
/* set this flag at same point that previous loop did */
3909-
skipped_some = true;
3910-
}
3911-
else
3912-
{
3913-
/* SECTION_NONE items must be processed if previous loop didn't */
3914-
if (!skipped_some)
3915-
continue;
3916-
}
3917-
3918-
if (next_work_item->depCount > 0)
3919-
par_list_append(pending_list, next_work_item);
3920-
else
3921-
par_list_append(&ready_list, next_work_item);
3922-
}
3961+
AH->restorePass = RESTORE_PASS_MAIN;
3962+
move_to_ready_list(pending_list, &ready_list, AH->restorePass);
39233963

39243964
/*
39253965
* main parent loop
39263966
*
39273967
* Keep going until there is no worker still running AND there is no work
3928-
* left to be done.
3968+
* left to be done. Note invariant: at top of loop, there should always
3969+
* be at least one worker available to dispatch a job to.
39293970
*/
3930-
39313971
ahlog(AH, 1, "entering main parallel loop\n");
39323972

3933-
while ((next_work_item = get_next_work_item(AH, &ready_list, pstate)) != NULL ||
3934-
!IsEveryWorkerIdle(pstate))
3973+
for (;;)
39353974
{
3975+
/* Look for an item ready to be dispatched to a worker */
3976+
next_work_item = get_next_work_item(AH, &ready_list, pstate);
39363977
if (next_work_item != NULL)
39373978
{
39383979
/* If not to be restored, don't waste time launching a worker */
3939-
if ((next_work_item->reqs & (REQ_SCHEMA | REQ_DATA)) == 0 ||
3940-
_tocEntryIsACL(next_work_item))
3980+
if ((next_work_item->reqs & (REQ_SCHEMA | REQ_DATA)) == 0)
39413981
{
39423982
ahlog(AH, 1, "skipping item %d %s %s\n",
39433983
next_work_item->dumpId,
39443984
next_work_item->desc, next_work_item->tag);
3945-
3985+
/* Drop it from ready_list, and update its dependencies */
39463986
par_list_remove(next_work_item);
39473987
reduce_dependencies(AH, next_work_item, &ready_list);
3948-
3988+
/* Loop around to see if anything else can be dispatched */
39493989
continue;
39503990
}
39513991

39523992
ahlog(AH, 1, "launching item %d %s %s\n",
39533993
next_work_item->dumpId,
39543994
next_work_item->desc, next_work_item->tag);
39553995

3996+
/* Remove it from ready_list, and dispatch to some worker */
39563997
par_list_remove(next_work_item);
39573998

39583999
DispatchJobForTocEntry(AH, pstate, next_work_item, ACT_RESTORE,
39594000
mark_restore_job_done, &ready_list);
39604001
}
4002+
else if (IsEveryWorkerIdle(pstate))
4003+
{
4004+
/*
4005+
* Nothing is ready and no worker is running, so we're done with
4006+
* the current pass or maybe with the whole process.
4007+
*/
4008+
if (AH->restorePass == RESTORE_PASS_LAST)
4009+
break; /* No more parallel processing is possible */
4010+
4011+
/* Advance to next restore pass */
4012+
AH->restorePass++;
4013+
/* That probably allows some stuff to be made ready */
4014+
move_to_ready_list(pending_list, &ready_list, AH->restorePass);
4015+
/* Loop around to see if anything's now ready */
4016+
continue;
4017+
}
39614018
else
39624019
{
3963-
/* at least one child is working and we have nothing ready. */
4020+
/*
4021+
* We have nothing ready, but at least one child is working, so
4022+
* wait for some subjob to finish.
4023+
*/
39644024
}
39654025

39664026
/*
@@ -3980,9 +4040,21 @@ restore_toc_entries_parallel(ArchiveHandle *AH, ParallelState *pstate,
39804040
next_work_item ? WFW_ONE_IDLE : WFW_GOT_STATUS);
39814041
}
39824042

4043+
/* There should now be nothing in ready_list. */
4044+
Assert(ready_list.par_next == &ready_list);
4045+
39834046
ahlog(AH, 1, "finished main parallel loop\n");
39844047
}
39854048

4049+
/*
4050+
* Main engine for parallel restore.
4051+
*
4052+
* Parallel restore is done in three phases. In this third phase,
4053+
* we mop up any remaining TOC entries by processing them serially.
4054+
* This phase normally should have nothing to do, but if we've somehow
4055+
* gotten stuck due to circular dependencies or some such, this provides
4056+
* at least some chance of completing the restore successfully.
4057+
*/
39864058
static void
39874059
restore_toc_entries_postfork(ArchiveHandle *AH, TocEntry *pending_list)
39884060
{
@@ -4002,18 +4074,17 @@ restore_toc_entries_postfork(ArchiveHandle *AH, TocEntry *pending_list)
40024074
_doSetFixedOutputState(AH);
40034075

40044076
/*
4005-
* Make sure there is no non-ACL work left due to, say, circular
4006-
* dependencies, or some other pathological condition. If so, do it in the
4007-
* single parent connection.
4077+
* Make sure there is no work left due to, say, circular dependencies, or
4078+
* some other pathological condition. If so, do it in the single parent
4079+
* connection. We don't sweat about RestorePass ordering; it's likely we
4080+
* already violated that.
40084081
*/
40094082
for (te = pending_list->par_next; te != pending_list; te = te->par_next)
40104083
{
40114084
ahlog(AH, 1, "processing missed item %d %s %s\n",
40124085
te->dumpId, te->desc, te->tag);
40134086
(void) restore_toc_entry(AH, te, false);
40144087
}
4015-
4016-
/* The ACLs will be handled back in RestoreArchive. */
40174088
}
40184089

40194090
/*
@@ -4072,6 +4143,36 @@ par_list_remove(TocEntry *te)
40724143
}
40734144

40744145

4146+
/*
4147+
* Move all immediately-ready items from pending_list to ready_list.
4148+
*
4149+
* Items are considered ready if they have no remaining dependencies and
4150+
* they belong in the current restore pass. (See also reduce_dependencies,
4151+
* which applies the same logic one-at-a-time.)
4152+
*/
4153+
static void
4154+
move_to_ready_list(TocEntry *pending_list, TocEntry *ready_list,
4155+
RestorePass pass)
4156+
{
4157+
TocEntry *te;
4158+
TocEntry *next_te;
4159+
4160+
for (te = pending_list->par_next; te != pending_list; te = next_te)
4161+
{
4162+
/* must save list link before possibly moving te to other list */
4163+
next_te = te->par_next;
4164+
4165+
if (te->depCount == 0 &&
4166+
_tocEntryRestorePass(te) == pass)
4167+
{
4168+
/* Remove it from pending_list ... */
4169+
par_list_remove(te);
4170+
/* ... and add to ready_list */
4171+
par_list_append(ready_list, te);
4172+
}
4173+
}
4174+
}
4175+
40754176
/*
40764177
* Find the next work item (if any) that is capable of being run now.
40774178
*
@@ -4457,8 +4558,17 @@ reduce_dependencies(ArchiveHandle *AH, TocEntry *te, TocEntry *ready_list)
44574558
{
44584559
TocEntry *otherte = AH->tocsByDumpId[te->revDeps[i]];
44594560

4561+
Assert(otherte->depCount > 0);
44604562
otherte->depCount--;
4461-
if (otherte->depCount == 0 && otherte->par_prev != NULL)
4563+
4564+
/*
4565+
* It's ready if it has no remaining dependencies and it belongs in
4566+
* the current restore pass. However, don't move it if it has not yet
4567+
* been put into the pending list.
4568+
*/
4569+
if (otherte->depCount == 0 &&
4570+
_tocEntryRestorePass(otherte) == AH->restorePass &&
4571+
otherte->par_prev != NULL)
44624572
{
44634573
/* It must be in the pending list, so remove it ... */
44644574
par_list_remove(otherte);

‎src/bin/pg_dump/pg_backup_archiver.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,30 @@ typedef enum
203203
OUTPUT_OTHERDATA /* writing data as INSERT commands */
204204
} ArchiverOutput;
205205

206+
/*
207+
* For historical reasons, ACL items are interspersed with everything else in
208+
* a dump file's TOC; typically they're right after the object they're for.
209+
* However, we need to restore data before ACLs, as otherwise a read-only
210+
* table (ie one where the owner has revoked her own INSERT privilege) causes
211+
* data restore failures. On the other hand, matview REFRESH commands should
212+
* come out after ACLs, as otherwise non-superuser-owned matviews might not
213+
* be able to execute. (If the permissions at the time of dumping would not
214+
* allow a REFRESH, too bad; we won't fix that for you.) These considerations
215+
* force us to make three passes over the TOC, restoring the appropriate
216+
* subset of items in each pass. We assume that the dependency sort resulted
217+
* in an appropriate ordering of items within each subset.
218+
* XXX This mechanism should be superseded by tracking dependencies on ACLs
219+
* properly; but we'll still need it for old dump files even after that.
220+
*/
221+
typedef enum
222+
{
223+
RESTORE_PASS_MAIN = 0, /* Main pass (most TOC item types) */
224+
RESTORE_PASS_ACL, /* ACL item types */
225+
RESTORE_PASS_REFRESH /* Matview REFRESH items */
226+
227+
#define RESTORE_PASS_LAST RESTORE_PASS_REFRESH
228+
} RestorePass;
229+
206230
typedef enum
207231
{
208232
REQ_SCHEMA = 0x01, /* want schema */
@@ -329,6 +353,7 @@ struct _archiveHandle
329353
int noTocComments;
330354
ArchiverStage stage;
331355
ArchiverStage lastErrorStage;
356+
RestorePass restorePass; /* used only during parallel restore */
332357
struct _tocEntry *currentTE;
333358
struct _tocEntry *lastErrorTE;
334359
};

0 commit comments

Comments
 (0)
Please sign in to comment.