*** pgsql/src/backend/access/heap/hio.c 2008/09/30 10:52:10 1.73 --- pgsql/src/backend/access/heap/hio.c 2008/11/06 20:51:14 1.74 *************** *** 8,20 **** * * * IDENTIFICATION ! * $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.72 2008/07/13 20:45:47 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" #include "access/hio.h" #include "storage/bufmgr.h" #include "storage/freespace.h" --- 8,21 ---- * * * IDENTIFICATION ! * $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.73 2008/09/30 10:52:10 heikki Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" + #include "access/heapam.h" #include "access/hio.h" #include "storage/bufmgr.h" #include "storage/freespace.h" *************** RelationPutHeapTuple(Relation relation, *** 57,62 **** --- 58,100 ---- } /* + * Read in a buffer, using bulk-insert strategy if bistate isn't NULL. + */ + static Buffer + ReadBufferBI(Relation relation, BlockNumber targetBlock, + BulkInsertState bistate) + { + Buffer buffer; + + /* If not bulk-insert, exactly like ReadBuffer */ + if (!bistate) + return ReadBuffer(relation, targetBlock); + + /* If we have the desired block already pinned, re-pin and return it */ + if (bistate->current_buf != InvalidBuffer) + { + if (BufferGetBlockNumber(bistate->current_buf) == targetBlock) + { + IncrBufferRefCount(bistate->current_buf); + return bistate->current_buf; + } + /* ... else drop the old buffer */ + ReleaseBuffer(bistate->current_buf); + bistate->current_buf = InvalidBuffer; + } + + /* Perform a read using the buffer strategy */ + buffer = ReadBufferExtended(relation, MAIN_FORKNUM, targetBlock, + RBM_NORMAL, bistate->strategy); + + /* Save the selected block as target for future inserts */ + IncrBufferRefCount(buffer); + bistate->current_buf = buffer; + + return buffer; + } + + /* * RelationGetBufferForTuple * * Returns pinned and exclusive-locked buffer of a page in given relation *************** RelationPutHeapTuple(Relation relation, *** 80,92 **** * happen if space is freed in that page after heap_update finds there's not * enough there). In that case, the page will be pinned and locked only once. * ! * If use_fsm is true (the normal case), we use FSM to help us find free ! * space. If use_fsm is false, we always append a new empty page to the ! * end of the relation if the tuple won't fit on the current target page. * This can save some cycles when we know the relation is new and doesn't * contain useful amounts of free space. * ! * The use_fsm = false case is also useful for non-WAL-logged additions to a * relation, if the caller holds exclusive lock and is careful to invalidate * relation->rd_targblock before the first insertion --- that ensures that * all insertions will occur into newly added pages and not be intermixed --- 118,130 ---- * happen if space is freed in that page after heap_update finds there's not * enough there). In that case, the page will be pinned and locked only once. * ! * We normally use FSM to help us find free space. However, ! * if HEAP_INSERT_SKIP_FSM is specified, we just append a new empty page to ! * the end of the relation if the tuple won't fit on the current target page. * This can save some cycles when we know the relation is new and doesn't * contain useful amounts of free space. * ! * HEAP_INSERT_SKIP_FSM is also useful for non-WAL-logged additions to a * relation, if the caller holds exclusive lock and is careful to invalidate * relation->rd_targblock before the first insertion --- that ensures that * all insertions will occur into newly added pages and not be intermixed *************** RelationPutHeapTuple(Relation relation, *** 94,99 **** --- 132,143 ---- * any committed data of other transactions. (See heap_insert's comments * for additional constraints needed for safe usage of this behavior.) * + * The caller can also provide a BulkInsertState object to optimize many + * insertions into the same relation. This keeps a pin on the current + * insertion target page (to save pin/unpin cycles) and also passes a + * BULKWRITE buffer selection strategy object to the buffer manager. + * Passing NULL for bistate selects the default behavior. + * * We always try to avoid filling existing pages further than the fillfactor. * This is OK since this routine is not consulted when updating a tuple and * keeping it on the same page, which is the scenario fillfactor is meant *************** RelationPutHeapTuple(Relation relation, *** 104,111 **** */ Buffer RelationGetBufferForTuple(Relation relation, Size len, ! Buffer otherBuffer, bool use_fsm) { Buffer buffer = InvalidBuffer; Page page; Size pageFreeSpace, --- 148,157 ---- */ Buffer RelationGetBufferForTuple(Relation relation, Size len, ! Buffer otherBuffer, int options, ! struct BulkInsertStateData *bistate) { + bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM); Buffer buffer = InvalidBuffer; Page page; Size pageFreeSpace, *************** RelationGetBufferForTuple(Relation relat *** 116,121 **** --- 162,170 ---- len = MAXALIGN(len); /* be conservative */ + /* Bulk insert is not supported for updates, only inserts. */ + Assert(otherBuffer == InvalidBuffer || !bistate); + /* * If we're gonna fail for oversize tuple, do it right away */ *************** RelationGetBufferForTuple(Relation relat *** 137,161 **** /* * We first try to put the tuple on the same page we last inserted a tuple ! * on, as cached in the relcache entry. If that doesn't work, we ask the ! * shared Free Space Map to locate a suitable page. Since the FSM's info ! * might be out of date, we have to be prepared to loop around and retry ! * multiple times. (To insure this isn't an infinite loop, we must update ! * the FSM with the correct amount of free space on each page that proves ! * not to be suitable.) If the FSM has no record of a page with enough ! * free space, we give up and extend the relation. * * When use_fsm is false, we either put the tuple onto the existing target * page or extend the relation. */ ! if (len + saveFreeSpace <= MaxHeapTupleSize) ! targetBlock = relation->rd_targblock; ! else { ! /* can't fit, don't screw up FSM request tracking by trying */ targetBlock = InvalidBlockNumber; use_fsm = false; } if (targetBlock == InvalidBlockNumber && use_fsm) { --- 186,212 ---- /* * We first try to put the tuple on the same page we last inserted a tuple ! * on, as cached in the BulkInsertState or relcache entry. If that ! * doesn't work, we ask the Free Space Map to locate a suitable page. ! * Since the FSM's info might be out of date, we have to be prepared to ! * loop around and retry multiple times. (To insure this isn't an infinite ! * loop, we must update the FSM with the correct amount of free space on ! * each page that proves not to be suitable.) If the FSM has no record of ! * a page with enough free space, we give up and extend the relation. * * When use_fsm is false, we either put the tuple onto the existing target * page or extend the relation. */ ! if (len + saveFreeSpace > MaxHeapTupleSize) { ! /* can't fit, don't bother asking FSM */ targetBlock = InvalidBlockNumber; use_fsm = false; } + else if (bistate && bistate->current_buf != InvalidBuffer) + targetBlock = BufferGetBlockNumber(bistate->current_buf); + else + targetBlock = relation->rd_targblock; if (targetBlock == InvalidBlockNumber && use_fsm) { *************** RelationGetBufferForTuple(Relation relat *** 189,195 **** if (otherBuffer == InvalidBuffer) { /* easy case */ ! buffer = ReadBuffer(relation, targetBlock); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); } else if (otherBlock == targetBlock) --- 240,246 ---- if (otherBuffer == InvalidBuffer) { /* easy case */ ! buffer = ReadBufferBI(relation, targetBlock, bistate); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); } else if (otherBlock == targetBlock) *************** RelationGetBufferForTuple(Relation relat *** 274,280 **** * it worth keeping an accurate file length in shared memory someplace, * rather than relying on the kernel to do it for us? */ ! buffer = ReadBuffer(relation, P_NEW); /* * We can be certain that locking the otherBuffer first is OK, since it --- 325,331 ---- * it worth keeping an accurate file length in shared memory someplace, * rather than relying on the kernel to do it for us? */ ! buffer = ReadBufferBI(relation, P_NEW, bistate); /* * We can be certain that locking the otherBuffer first is OK, since it