*** pgsql/src/backend/access/hash/hash.c 2009/06/11 14:48:53 1.112 --- pgsql/src/backend/access/hash/hash.c 2009/11/01 21:25:32 1.112.2.1 *************** *** 8,14 **** * * * IDENTIFICATION ! * $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.111 2009/06/06 22:13:50 tgl Exp $ * * NOTES * This file contains only the public interface routines. --- 8,14 ---- * * * IDENTIFICATION ! * $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.112 2009/06/11 14:48:53 momjian Exp $ * * NOTES * This file contains only the public interface routines. *************** hashgettuple(PG_FUNCTION_ARGS) *** 206,213 **** --- 206,215 ---- ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); HashScanOpaque so = (HashScanOpaque) scan->opaque; Relation rel = scan->indexRelation; + Buffer buf; Page page; OffsetNumber offnum; + ItemPointer current; bool res; /* Hash indexes are always lossy since we store only the hash code */ *************** hashgettuple(PG_FUNCTION_ARGS) *** 225,233 **** * appropriate direction. If we haven't done so yet, we call a routine to * get the first item in the scan. */ ! if (ItemPointerIsValid(&(so->hashso_curpos))) { /* * Check to see if we should kill the previously-fetched tuple. */ if (scan->kill_prior_tuple) --- 227,265 ---- * appropriate direction. If we haven't done so yet, we call a routine to * get the first item in the scan. */ ! current = &(so->hashso_curpos); ! if (ItemPointerIsValid(current)) { /* + * An insertion into the current index page could have happened while + * we didn't have read lock on it. Re-find our position by looking + * for the TID we previously returned. (Because we hold share lock on + * the bucket, no deletions or splits could have occurred; therefore + * we can expect that the TID still exists in the current index page, + * at an offset >= where we were.) + */ + OffsetNumber maxoffnum; + + buf = so->hashso_curbuf; + Assert(BufferIsValid(buf)); + page = BufferGetPage(buf); + maxoffnum = PageGetMaxOffsetNumber(page); + for (offnum = ItemPointerGetOffsetNumber(current); + offnum <= maxoffnum; + offnum = OffsetNumberNext(offnum)) + { + IndexTuple itup; + + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); + if (ItemPointerEquals(&scan->xs_ctup.t_self, &itup->t_tid)) + break; + } + if (offnum > maxoffnum) + elog(ERROR, "failed to re-find scan position within index \"%s\"", + RelationGetRelationName(rel)); + ItemPointerSetOffsetNumber(current, offnum); + + /* * Check to see if we should kill the previously-fetched tuple. */ if (scan->kill_prior_tuple) *************** hashgettuple(PG_FUNCTION_ARGS) *** 235,242 **** /* * Yes, so mark it by setting the LP_DEAD state in the item flags. */ - offnum = ItemPointerGetOffsetNumber(&(so->hashso_curpos)); - page = BufferGetPage(so->hashso_curbuf); ItemIdMarkDead(PageGetItemId(page, offnum)); /* --- 267,272 ---- *************** hashgettuple(PG_FUNCTION_ARGS) *** 244,250 **** * as a commit-hint-bit status update for heap tuples: we mark the * buffer dirty but don't make a WAL log entry. */ ! SetBufferCommitInfoNeedsSave(so->hashso_curbuf); } /* --- 274,280 ---- * as a commit-hint-bit status update for heap tuples: we mark the * buffer dirty but don't make a WAL log entry. */ ! SetBufferCommitInfoNeedsSave(buf); } /* *************** hashgettuple(PG_FUNCTION_ARGS) *** 262,268 **** { while (res) { ! offnum = ItemPointerGetOffsetNumber(&(so->hashso_curpos)); page = BufferGetPage(so->hashso_curbuf); if (!ItemIdIsDead(PageGetItemId(page, offnum))) break; --- 292,298 ---- { while (res) { ! offnum = ItemPointerGetOffsetNumber(current); page = BufferGetPage(so->hashso_curbuf); if (!ItemIdIsDead(PageGetItemId(page, offnum))) break; *************** loop_top: *** 517,523 **** HashPageOpaque opaque; OffsetNumber offno; OffsetNumber maxoffno; ! bool page_dirty = false; vacuum_delay_point(); --- 547,554 ---- HashPageOpaque opaque; OffsetNumber offno; OffsetNumber maxoffno; ! OffsetNumber deletable[MaxOffsetNumber]; ! int ndeletable = 0; vacuum_delay_point(); *************** loop_top: *** 529,537 **** Assert(opaque->hasho_bucket == cur_bucket); /* Scan each tuple in page */ - offno = FirstOffsetNumber; maxoffno = PageGetMaxOffsetNumber(page); ! while (offno <= maxoffno) { IndexTuple itup; ItemPointer htup; --- 560,569 ---- Assert(opaque->hasho_bucket == cur_bucket); /* Scan each tuple in page */ maxoffno = PageGetMaxOffsetNumber(page); ! for (offno = FirstOffsetNumber; ! offno <= maxoffno; ! offno = OffsetNumberNext(offno)) { IndexTuple itup; ItemPointer htup; *************** loop_top: *** 541,570 **** htup = &(itup->t_tid); if (callback(htup, callback_state)) { ! /* delete the item from the page */ ! PageIndexTupleDelete(page, offno); ! bucket_dirty = page_dirty = true; ! ! /* don't increment offno, instead decrement maxoffno */ ! maxoffno = OffsetNumberPrev(maxoffno); ! tuples_removed += 1; } else - { - offno = OffsetNumberNext(offno); - num_index_tuples += 1; - } } /* ! * Write page if needed, advance to next page. */ blkno = opaque->hasho_nextblkno; ! if (page_dirty) _hash_wrtbuf(rel, buf); else _hash_relbuf(rel, buf); } --- 573,597 ---- htup = &(itup->t_tid); if (callback(htup, callback_state)) { ! /* mark the item for deletion */ ! deletable[ndeletable++] = offno; tuples_removed += 1; } else num_index_tuples += 1; } /* ! * Apply deletions and write page if needed, advance to next page. */ blkno = opaque->hasho_nextblkno; ! if (ndeletable > 0) ! { ! PageIndexMultiDelete(page, deletable, ndeletable); _hash_wrtbuf(rel, buf); + bucket_dirty = true; + } else _hash_relbuf(rel, buf); }