Skip to content

Commit 9155580

Browse files
committed
Generate less WAL during GiST, GIN and SP-GiST index build.
Instead of WAL-logging every modification during the build separately, first build the index without any WAL-logging, and make a separate pass through the index at the end, to write all pages to the WAL. This significantly reduces the amount of WAL generated, and is usually also faster, despite the extra I/O needed for the extra scan through the index. WAL generated this way is also faster to replay. For GiST, the LSN-NSN interlock makes this a little tricky. All pages must be marked with a valid (i.e. non-zero) LSN, so that the parent-child LSN-NSN interlock works correctly. We now use magic value 1 for that during index build. Change the fake LSN counter to begin from 1000, so that 1 is safely smaller than any real or fake LSN. 2 would've been enough for our purposes, but let's reserve a bigger range, in case we need more special values in the future. Author: Anastasia Lubennikova, Andrey V. Lepikhov Reviewed-by: Heikki Linnakangas, Dmitry Dolgov
1 parent 5f76804 commit 9155580

27 files changed

+223
-222
lines changed

src/backend/access/gin/ginbtree.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
396396
/* It will fit, perform the insertion */
397397
START_CRIT_SECTION();
398398

399-
if (RelationNeedsWAL(btree->index))
399+
if (RelationNeedsWAL(btree->index) && !btree->isBuild)
400400
{
401401
XLogBeginInsert();
402402
XLogRegisterBuffer(0, stack->buffer, REGBUF_STANDARD);
@@ -417,7 +417,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
417417
MarkBufferDirty(childbuf);
418418
}
419419

420-
if (RelationNeedsWAL(btree->index))
420+
if (RelationNeedsWAL(btree->index) && !btree->isBuild)
421421
{
422422
XLogRecPtr recptr;
423423
ginxlogInsert xlrec;
@@ -595,7 +595,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
595595
}
596596

597597
/* write WAL record */
598-
if (RelationNeedsWAL(btree->index))
598+
if (RelationNeedsWAL(btree->index) && !btree->isBuild)
599599
{
600600
XLogRecPtr recptr;
601601

src/backend/access/gin/gindatapage.c

+5-4
Original file line numberDiff line numberDiff line change
@@ -593,7 +593,7 @@ dataBeginPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
593593
* Great, all the items fit on a single page. If needed, prepare data
594594
* for a WAL record describing the changes we'll make.
595595
*/
596-
if (RelationNeedsWAL(btree->index))
596+
if (RelationNeedsWAL(btree->index) && !btree->isBuild)
597597
computeLeafRecompressWALData(leaf);
598598

599599
/*
@@ -719,7 +719,7 @@ dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
719719
dataPlaceToPageLeafRecompress(buf, leaf);
720720

721721
/* If needed, register WAL data built by computeLeafRecompressWALData */
722-
if (RelationNeedsWAL(btree->index))
722+
if (RelationNeedsWAL(btree->index) && !btree->isBuild)
723723
{
724724
XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
725725
}
@@ -1152,7 +1152,7 @@ dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
11521152
pitem = (PostingItem *) insertdata;
11531153
GinDataPageAddPostingItem(page, pitem, off);
11541154

1155-
if (RelationNeedsWAL(btree->index))
1155+
if (RelationNeedsWAL(btree->index) && !btree->isBuild)
11561156
{
11571157
/*
11581158
* This must be static, because it has to survive until XLogInsert,
@@ -1773,6 +1773,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
17731773
Pointer ptr;
17741774
int nrootitems;
17751775
int rootsize;
1776+
bool is_build = (buildStats != NULL);
17761777

17771778
/* Construct the new root page in memory first. */
17781779
tmppage = (Page) palloc(BLCKSZ);
@@ -1826,7 +1827,7 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
18261827
PageRestoreTempPage(tmppage, page);
18271828
MarkBufferDirty(buffer);
18281829

1829-
if (RelationNeedsWAL(index))
1830+
if (RelationNeedsWAL(index) && !is_build)
18301831
{
18311832
XLogRecPtr recptr;
18321833
ginxlogCreatePostingTree data;

src/backend/access/gin/ginentrypage.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -571,7 +571,7 @@ entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
571571
elog(ERROR, "failed to add item to index page in \"%s\"",
572572
RelationGetRelationName(btree->index));
573573

574-
if (RelationNeedsWAL(btree->index))
574+
if (RelationNeedsWAL(btree->index) && !btree->isBuild)
575575
{
576576
/*
577577
* This must be static, because it has to survive until XLogInsert,

src/backend/access/gin/gininsert.c

+13-18
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ ginEntryInsert(GinState *ginstate,
195195
buildStats->nEntries++;
196196

197197
ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
198+
btree.isBuild = (buildStats != NULL);
198199

199200
stack = ginFindLeafPage(&btree, false, false, NULL);
200201
page = BufferGetPage(stack->buffer);
@@ -347,23 +348,6 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
347348
GinInitBuffer(RootBuffer, GIN_LEAF);
348349
MarkBufferDirty(RootBuffer);
349350

350-
if (RelationNeedsWAL(index))
351-
{
352-
XLogRecPtr recptr;
353-
Page page;
354-
355-
XLogBeginInsert();
356-
XLogRegisterBuffer(0, MetaBuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
357-
XLogRegisterBuffer(1, RootBuffer, REGBUF_WILL_INIT);
358-
359-
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX);
360-
361-
page = BufferGetPage(RootBuffer);
362-
PageSetLSN(page, recptr);
363-
364-
page = BufferGetPage(MetaBuffer);
365-
PageSetLSN(page, recptr);
366-
}
367351

368352
UnlockReleaseBuffer(MetaBuffer);
369353
UnlockReleaseBuffer(RootBuffer);
@@ -419,7 +403,18 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
419403
* Update metapage stats
420404
*/
421405
buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
422-
ginUpdateStats(index, &buildstate.buildStats);
406+
ginUpdateStats(index, &buildstate.buildStats, true);
407+
408+
/*
409+
* We didn't write WAL records as we built the index, so if WAL-logging is
410+
* required, write all pages to the WAL now.
411+
*/
412+
if (RelationNeedsWAL(index))
413+
{
414+
log_newpage_range(index, MAIN_FORKNUM,
415+
0, RelationGetNumberOfBlocks(index),
416+
true);
417+
}
423418

424419
/*
425420
* Return statistics

src/backend/access/gin/ginutil.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -662,7 +662,7 @@ ginGetStats(Relation index, GinStatsData *stats)
662662
* Note: nPendingPages and ginVersion are *not* copied over
663663
*/
664664
void
665-
ginUpdateStats(Relation index, const GinStatsData *stats)
665+
ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build)
666666
{
667667
Buffer metabuffer;
668668
Page metapage;
@@ -692,7 +692,7 @@ ginUpdateStats(Relation index, const GinStatsData *stats)
692692

693693
MarkBufferDirty(metabuffer);
694694

695-
if (RelationNeedsWAL(index))
695+
if (RelationNeedsWAL(index) && !is_build)
696696
{
697697
XLogRecPtr recptr;
698698
ginxlogUpdateMeta data;

src/backend/access/gin/ginvacuum.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -759,7 +759,7 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
759759

760760
/* Update the metapage with accurate page and entry counts */
761761
idxStat.nTotalPages = npages;
762-
ginUpdateStats(info->index, &idxStat);
762+
ginUpdateStats(info->index, &idxStat, false);
763763

764764
/* Finally, vacuum the FSM */
765765
IndexFreeSpaceMapVacuum(info->index);

src/backend/access/gin/ginxlog.c

-33
Original file line numberDiff line numberDiff line change
@@ -40,36 +40,6 @@ ginRedoClearIncompleteSplit(XLogReaderState *record, uint8 block_id)
4040
UnlockReleaseBuffer(buffer);
4141
}
4242

43-
static void
44-
ginRedoCreateIndex(XLogReaderState *record)
45-
{
46-
XLogRecPtr lsn = record->EndRecPtr;
47-
Buffer RootBuffer,
48-
MetaBuffer;
49-
Page page;
50-
51-
MetaBuffer = XLogInitBufferForRedo(record, 0);
52-
Assert(BufferGetBlockNumber(MetaBuffer) == GIN_METAPAGE_BLKNO);
53-
page = (Page) BufferGetPage(MetaBuffer);
54-
55-
GinInitMetabuffer(MetaBuffer);
56-
57-
PageSetLSN(page, lsn);
58-
MarkBufferDirty(MetaBuffer);
59-
60-
RootBuffer = XLogInitBufferForRedo(record, 1);
61-
Assert(BufferGetBlockNumber(RootBuffer) == GIN_ROOT_BLKNO);
62-
page = (Page) BufferGetPage(RootBuffer);
63-
64-
GinInitBuffer(RootBuffer, GIN_LEAF);
65-
66-
PageSetLSN(page, lsn);
67-
MarkBufferDirty(RootBuffer);
68-
69-
UnlockReleaseBuffer(RootBuffer);
70-
UnlockReleaseBuffer(MetaBuffer);
71-
}
72-
7343
static void
7444
ginRedoCreatePTree(XLogReaderState *record)
7545
{
@@ -767,9 +737,6 @@ gin_redo(XLogReaderState *record)
767737
oldCtx = MemoryContextSwitchTo(opCtx);
768738
switch (info)
769739
{
770-
case XLOG_GIN_CREATE_INDEX:
771-
ginRedoCreateIndex(record);
772-
break;
773740
case XLOG_GIN_CREATE_PTREE:
774741
ginRedoCreatePTree(record);
775742
break;

src/backend/access/gist/gist.c

+46-30
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ gistinsert(Relation r, Datum *values, bool *isnull,
173173
values, isnull, true /* size is currently bogus */ );
174174
itup->t_tid = *ht_ctid;
175175

176-
gistdoinsert(r, itup, 0, giststate, heapRel);
176+
gistdoinsert(r, itup, 0, giststate, heapRel, false);
177177

178178
/* cleanup */
179179
MemoryContextSwitchTo(oldCxt);
@@ -220,7 +220,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
220220
Buffer leftchildbuf,
221221
List **splitinfo,
222222
bool markfollowright,
223-
Relation heapRel)
223+
Relation heapRel,
224+
bool is_build)
224225
{
225226
BlockNumber blkno = BufferGetBlockNumber(buffer);
226227
Page page = BufferGetPage(buffer);
@@ -459,7 +460,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
459460
* insertion for that. NB: The number of pages and data segments
460461
* specified here must match the calculations in gistXLogSplit()!
461462
*/
462-
if (RelationNeedsWAL(rel))
463+
if (!is_build && RelationNeedsWAL(rel))
463464
XLogEnsureRecordSpace(npage, 1 + npage * 2);
464465

465466
START_CRIT_SECTION();
@@ -480,18 +481,30 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
480481
PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
481482
dist->page = BufferGetPage(dist->buffer);
482483

483-
/* Write the WAL record */
484-
if (RelationNeedsWAL(rel))
485-
recptr = gistXLogSplit(is_leaf,
486-
dist, oldrlink, oldnsn, leftchildbuf,
487-
markfollowright);
484+
/*
485+
* Write the WAL record.
486+
*
487+
* If we're building a new index, however, we don't WAL-log changes
488+
* yet. The LSN-NSN interlock between parent and child requires that
489+
* LSNs never move backwards, so set the LSNs to a value that's
490+
* smaller than any real or fake unlogged LSN that might be generated
491+
* later. (There can't be any concurrent scans during index build, so
492+
* we don't need to be able to detect concurrent splits yet.)
493+
*/
494+
if (is_build)
495+
recptr = GistBuildLSN;
488496
else
489-
recptr = gistGetFakeLSN(rel);
497+
{
498+
if (RelationNeedsWAL(rel))
499+
recptr = gistXLogSplit(is_leaf,
500+
dist, oldrlink, oldnsn, leftchildbuf,
501+
markfollowright);
502+
else
503+
recptr = gistGetFakeLSN(rel);
504+
}
490505

491506
for (ptr = dist; ptr; ptr = ptr->next)
492-
{
493507
PageSetLSN(ptr->page, recptr);
494-
}
495508

496509
/*
497510
* Return the new child buffers to the caller.
@@ -545,28 +558,29 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
545558
if (BufferIsValid(leftchildbuf))
546559
MarkBufferDirty(leftchildbuf);
547560

548-
if (RelationNeedsWAL(rel))
561+
if (is_build)
562+
recptr = GistBuildLSN;
563+
else
549564
{
550-
OffsetNumber ndeloffs = 0,
551-
deloffs[1];
552-
553-
if (OffsetNumberIsValid(oldoffnum))
565+
if (RelationNeedsWAL(rel))
554566
{
555-
deloffs[0] = oldoffnum;
556-
ndeloffs = 1;
557-
}
567+
OffsetNumber ndeloffs = 0,
568+
deloffs[1];
558569

559-
recptr = gistXLogUpdate(buffer,
560-
deloffs, ndeloffs, itup, ntup,
561-
leftchildbuf);
570+
if (OffsetNumberIsValid(oldoffnum))
571+
{
572+
deloffs[0] = oldoffnum;
573+
ndeloffs = 1;
574+
}
562575

563-
PageSetLSN(page, recptr);
564-
}
565-
else
566-
{
567-
recptr = gistGetFakeLSN(rel);
568-
PageSetLSN(page, recptr);
576+
recptr = gistXLogUpdate(buffer,
577+
deloffs, ndeloffs, itup, ntup,
578+
leftchildbuf);
579+
}
580+
else
581+
recptr = gistGetFakeLSN(rel);
569582
}
583+
PageSetLSN(page, recptr);
570584

571585
if (newblkno)
572586
*newblkno = blkno;
@@ -607,7 +621,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
607621
*/
608622
void
609623
gistdoinsert(Relation r, IndexTuple itup, Size freespace,
610-
GISTSTATE *giststate, Relation heapRel)
624+
GISTSTATE *giststate, Relation heapRel, bool is_build)
611625
{
612626
ItemId iid;
613627
IndexTuple idxtuple;
@@ -620,6 +634,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace,
620634
state.freespace = freespace;
621635
state.r = r;
622636
state.heapRel = heapRel;
637+
state.is_build = is_build;
623638

624639
/* Start from the root */
625640
firststack.blkno = GIST_ROOT_BLKNO;
@@ -1252,7 +1267,8 @@ gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
12521267
leftchild,
12531268
&splitinfo,
12541269
true,
1255-
state->heapRel);
1270+
state->heapRel,
1271+
state->is_build);
12561272

12571273
/*
12581274
* Before recursing up in case the page was split, release locks on the

0 commit comments

Comments
 (0)