*** pgsql/src/backend/access/hash/hashpage.c 2009/06/11 14:48:53 1.80 --- pgsql/src/backend/access/hash/hashpage.c 2009/11/01 21:25:32 1.80.2.1 *************** *** 8,14 **** * * * IDENTIFICATION ! * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.79 2009/01/01 17:23:35 momjian Exp $ * * NOTES * Postgres hash pages look like ordinary relation pages. The opaque --- 8,14 ---- * * * IDENTIFICATION ! * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.80 2009/06/11 14:48:53 momjian Exp $ * * NOTES * Postgres hash pages look like ordinary relation pages. The opaque *************** _hash_splitbucket(Relation rel, *** 765,784 **** uint32 highmask, uint32 lowmask) { - Bucket bucket; - Buffer obuf; - Buffer nbuf; BlockNumber oblkno; BlockNumber nblkno; ! HashPageOpaque oopaque; ! HashPageOpaque nopaque; ! IndexTuple itup; ! Size itemsz; ! OffsetNumber ooffnum; ! OffsetNumber noffnum; ! OffsetNumber omaxoffnum; Page opage; Page npage; /* * It should be okay to simultaneously write-lock pages from each bucket, --- 765,778 ---- uint32 highmask, uint32 lowmask) { BlockNumber oblkno; BlockNumber nblkno; ! Buffer obuf; ! Buffer nbuf; Page opage; Page npage; + HashPageOpaque oopaque; + HashPageOpaque nopaque; /* * It should be okay to simultaneously write-lock pages from each bucket, *************** _hash_splitbucket(Relation rel, *** 805,899 **** /* * Partition the tuples in the old bucket between the old bucket and the * new bucket, advancing along the old bucket's overflow bucket chain and ! * adding overflow pages to the new bucket as needed. */ - ooffnum = FirstOffsetNumber; - omaxoffnum = PageGetMaxOffsetNumber(opage); for (;;) { ! /* ! * at each iteration through this loop, each of these variables should ! * be up-to-date: obuf opage oopaque ooffnum omaxoffnum ! */ ! ! /* check if we're at the end of the page */ ! if (ooffnum > omaxoffnum) { ! /* at end of page, but check for an(other) overflow page */ ! oblkno = oopaque->hasho_nextblkno; ! if (!BlockNumberIsValid(oblkno)) ! break; /* ! * we ran out of tuples on this particular page, but we have more ! * overflow pages; advance to next page. */ ! _hash_wrtbuf(rel, obuf); ! obuf = _hash_getbuf(rel, oblkno, HASH_WRITE, LH_OVERFLOW_PAGE); ! opage = BufferGetPage(obuf); ! oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); ! ooffnum = FirstOffsetNumber; ! omaxoffnum = PageGetMaxOffsetNumber(opage); ! continue; } /* ! * Fetch the item's hash key (conveniently stored in the item) and ! * determine which bucket it now belongs in. */ ! itup = (IndexTuple) PageGetItem(opage, PageGetItemId(opage, ooffnum)); ! bucket = _hash_hashkey2bucket(_hash_get_indextuple_hashkey(itup), ! maxbucket, highmask, lowmask); ! ! if (bucket == nbucket) { ! /* ! * insert the tuple into the new bucket. if it doesn't fit on the ! * current page in the new bucket, we must allocate a new overflow ! * page and place the tuple on that page instead. ! */ ! itemsz = IndexTupleDSize(*itup); ! itemsz = MAXALIGN(itemsz); ! ! if (PageGetFreeSpace(npage) < itemsz) ! { ! /* write out nbuf and drop lock, but keep pin */ ! _hash_chgbufaccess(rel, nbuf, HASH_WRITE, HASH_NOLOCK); ! /* chain to a new overflow page */ ! nbuf = _hash_addovflpage(rel, metabuf, nbuf); ! npage = BufferGetPage(nbuf); ! /* we don't need nopaque within the loop */ ! } ! ! noffnum = OffsetNumberNext(PageGetMaxOffsetNumber(npage)); ! if (PageAddItem(npage, (Item) itup, itemsz, noffnum, false, false) ! == InvalidOffsetNumber) ! elog(ERROR, "failed to add index item to \"%s\"", ! RelationGetRelationName(rel)); ! ! /* ! * now delete the tuple from the old bucket. after this section ! * of code, 'ooffnum' will actually point to the ItemId to which ! * we would point if we had advanced it before the deletion ! * (PageIndexTupleDelete repacks the ItemId array). this also ! * means that 'omaxoffnum' is exactly one less than it used to be, ! * so we really can just decrement it instead of calling ! * PageGetMaxOffsetNumber. ! */ ! PageIndexTupleDelete(opage, ooffnum); ! omaxoffnum = OffsetNumberPrev(omaxoffnum); } else ! { ! /* ! * the tuple stays on this page. we didn't move anything, so we ! * didn't delete anything and therefore we don't have to change ! * 'omaxoffnum'. ! */ ! Assert(bucket == obucket); ! ooffnum = OffsetNumberNext(ooffnum); ! } } /* --- 799,898 ---- /* * Partition the tuples in the old bucket between the old bucket and the * new bucket, advancing along the old bucket's overflow bucket chain and ! * adding overflow pages to the new bucket as needed. Outer loop ! * iterates once per page in old bucket. */ for (;;) { ! OffsetNumber ooffnum; ! OffsetNumber omaxoffnum; ! OffsetNumber deletable[MaxOffsetNumber]; ! int ndeletable = 0; ! ! /* Scan each tuple in old page */ ! omaxoffnum = PageGetMaxOffsetNumber(opage); ! for (ooffnum = FirstOffsetNumber; ! ooffnum <= omaxoffnum; ! ooffnum = OffsetNumberNext(ooffnum)) { ! IndexTuple itup; ! Size itemsz; ! Bucket bucket; /* ! * Fetch the item's hash key (conveniently stored in the item) and ! * determine which bucket it now belongs in. */ ! itup = (IndexTuple) PageGetItem(opage, ! PageGetItemId(opage, ooffnum)); ! bucket = _hash_hashkey2bucket(_hash_get_indextuple_hashkey(itup), ! maxbucket, highmask, lowmask); ! if (bucket == nbucket) ! { ! /* ! * insert the tuple into the new bucket. if it doesn't fit on ! * the current page in the new bucket, we must allocate a new ! * overflow page and place the tuple on that page instead. ! */ ! itemsz = IndexTupleDSize(*itup); ! itemsz = MAXALIGN(itemsz); ! ! if (PageGetFreeSpace(npage) < itemsz) ! { ! /* write out nbuf and drop lock, but keep pin */ ! _hash_chgbufaccess(rel, nbuf, HASH_WRITE, HASH_NOLOCK); ! /* chain to a new overflow page */ ! nbuf = _hash_addovflpage(rel, metabuf, nbuf); ! npage = BufferGetPage(nbuf); ! /* we don't need nblkno or nopaque within the loop */ ! } ! ! /* ! * Insert tuple on new page, using _hash_pgaddtup to ensure ! * correct ordering by hashkey. This is a tad inefficient ! * since we may have to shuffle itempointers repeatedly. ! * Possible future improvement: accumulate all the items for ! * the new page and qsort them before insertion. ! */ ! (void) _hash_pgaddtup(rel, nbuf, itemsz, itup); ! ! /* ! * Mark tuple for deletion from old page. ! */ ! deletable[ndeletable++] = ooffnum; ! } ! else ! { ! /* ! * the tuple stays on this page, so nothing to do. ! */ ! Assert(bucket == obucket); ! } } + oblkno = oopaque->hasho_nextblkno; + /* ! * Done scanning this old page. If we moved any tuples, delete them ! * from the old page. */ ! if (ndeletable > 0) { ! PageIndexMultiDelete(opage, deletable, ndeletable); ! _hash_wrtbuf(rel, obuf); } else ! _hash_relbuf(rel, obuf); ! ! /* Exit loop if no more overflow pages in old bucket */ ! if (!BlockNumberIsValid(oblkno)) ! break; ! ! /* Else, advance to next old page */ ! obuf = _hash_getbuf(rel, oblkno, HASH_WRITE, LH_OVERFLOW_PAGE); ! opage = BufferGetPage(obuf); ! oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); } /* *************** _hash_splitbucket(Relation rel, *** 902,908 **** * tuples remaining in the old bucket (including the overflow pages) are * packed as tightly as possible. The new bucket is already tight. */ - _hash_wrtbuf(rel, obuf); _hash_wrtbuf(rel, nbuf); _hash_squeezebucket(rel, obucket, start_oblkno, NULL); --- 901,906 ----