diff options
Diffstat (limited to 'src')
37 files changed, 923 insertions, 1766 deletions
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 87a251915a5..fa77318ea3f 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -12,7 +12,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.111 2004/02/06 19:36:17 wieck Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.112 2004/02/10 01:55:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -821,7 +821,9 @@ btvacuumcleanup(PG_FUNCTION_ARGS) /* * Do the physical truncation. */ - new_pages = smgrtruncate(DEFAULT_SMGR, rel, new_pages); + if (rel->rd_smgr == NULL) + rel->rd_smgr = smgropen(rel->rd_node); + new_pages = smgrtruncate(rel->rd_smgr, new_pages); rel->rd_nblocks = new_pages; /* update relcache * immediately */ rel->rd_targblock = InvalidBlockNumber; diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index ba3054f14bb..c92f90f6ca8 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.10 2004/01/28 21:02:39 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.11 2004/02/10 01:55:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,6 +19,7 @@ #include <unistd.h> #include "access/slru.h" +#include "storage/fd.h" #include "storage/lwlock.h" #include "miscadmin.h" diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index d5f357bc5ff..06e152d1bba 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.161 2004/01/26 22:51:55 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.162 2004/02/10 01:55:24 tgl Exp $ * * NOTES * Transaction aborts can now occur two ways: @@ -159,6 +159,7 @@ #include "executor/spi.h" #include "libpq/be-fsstubs.h" #include "miscadmin.h" +#include "storage/fd.h" #include "storage/proc.h" #include "storage/sinval.h" #include "storage/smgr.h" diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index fe1ecd453c5..9056f0b4549 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.133 2004/01/26 22:35:31 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.134 2004/02/10 01:55:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -30,6 +30,7 @@ #include "catalog/catversion.h" #include "catalog/pg_control.h" #include "storage/bufpage.h" +#include "storage/fd.h" #include "storage/lwlock.h" #include "storage/pmsignal.h" #include "storage/proc.h" @@ -3126,7 +3127,6 @@ ShutdownXLOG(int code, Datum arg) MyXactMadeTempRelUpdate = false; CritSectionCount++; - CreateDummyCaches(); CreateCheckPoint(true, true); ShutdownCLOG(); CritSectionCount--; diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index d200b7e17ad..0271742ce0a 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.28 2003/12/14 00:34:47 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.29 2004/02/10 01:55:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -261,14 +261,12 @@ _xl_remove_hash_entry(XLogRelDesc *rdesc) if (hentry == NULL) elog(PANIC, "_xl_remove_hash_entry: file was not found in cache"); - if (rdesc->reldata.rd_fd >= 0) - smgrclose(DEFAULT_SMGR, &(rdesc->reldata)); + if (rdesc->reldata.rd_smgr != NULL) + smgrclose(rdesc->reldata.rd_smgr); memset(rdesc, 0, sizeof(XLogRelDesc)); memset(tpgc, 0, sizeof(FormData_pg_class)); rdesc->reldata.rd_rel = tpgc; - - return; } static XLogRelDesc * @@ -296,7 +294,6 @@ _xl_new_reldesc(void) void XLogInitRelationCache(void) { - CreateDummyCaches(); _xl_init_rel_cache(); } @@ -306,8 +303,6 @@ XLogCloseRelationCache(void) HASH_SEQ_STATUS status; XLogRelCacheEntry *hentry; - DestroyDummyCaches(); - if (!_xlrelarr) return; @@ -347,11 +342,18 @@ XLogOpenRelation(bool redo, RmgrId rmid, RelFileNode rnode) sprintf(RelationGetRelationName(&(res->reldata)), "%u", rnode.relNode); - /* unexisting DB id */ - res->reldata.rd_lockInfo.lockRelId.dbId = RecoveryDb; - res->reldata.rd_lockInfo.lockRelId.relId = rnode.relNode; res->reldata.rd_node = rnode; + /* + * We set up the lockRelId in case anything tries to lock the dummy + * relation. Note that this is fairly bogus since relNode may be + * different from the relation's OID. It shouldn't really matter + * though, since we are presumably running by ourselves and can't + * have any lock conflicts ... + */ + res->reldata.rd_lockInfo.lockRelId.dbId = rnode.tblNode; + res->reldata.rd_lockInfo.lockRelId.relId = rnode.relNode; + hentry = (XLogRelCacheEntry *) hash_search(_xlrelcache, (void *) &rnode, HASH_ENTER, &found); @@ -364,9 +366,17 @@ XLogOpenRelation(bool redo, RmgrId rmid, RelFileNode rnode) hentry->rdesc = res; res->reldata.rd_targblock = InvalidBlockNumber; - res->reldata.rd_fd = -1; - res->reldata.rd_fd = smgropen(DEFAULT_SMGR, &(res->reldata), - true /* allow failure */ ); + res->reldata.rd_smgr = smgropen(res->reldata.rd_node); + /* + * Create the target file if it doesn't already exist. This lets + * us cope if the replay sequence contains writes to a relation + * that is later deleted. (The original coding of this routine + * would instead return NULL, causing the writes to be suppressed. + * But that seems like it risks losing valuable data if the filesystem + * loses an inode during a crash. Better to write the data until we + * are actually told to delete the file.) + */ + smgrcreate(res->reldata.rd_smgr, res->reldata.rd_istemp, true); } res->moreRecently = &(_xlrelarr[0]); @@ -374,8 +384,5 @@ XLogOpenRelation(bool redo, RmgrId rmid, RelFileNode rnode) _xlrelarr[0].lessRecently = res; res->lessRecently->moreRecently = res; - if (res->reldata.rd_fd < 0) /* file doesn't exist */ - return (NULL); - return (&(res->reldata)); } diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 8d29134d39d..2f67061c48b 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.175 2004/01/07 18:56:25 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.176 2004/02/10 01:55:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -465,14 +465,12 @@ BootstrapMain(int argc, char *argv[]) break; case BS_XLOG_CHECKPOINT: - CreateDummyCaches(); CreateCheckPoint(false, false); SetSavedRedoRecPtr(); /* pass redo ptr back to * postmaster */ proc_exit(0); /* done */ case BS_XLOG_BGWRITER: - CreateDummyCaches(); BufferBackgroundWriter(); proc_exit(0); /* done */ diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 14c3745e5e9..905aa5b0b22 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.257 2003/12/28 21:57:36 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.258 2004/02/10 01:55:24 tgl Exp $ * * * INTERFACE ROUTINES @@ -298,9 +298,9 @@ heap_create(const char *relname, void heap_storage_create(Relation rel) { - Assert(rel->rd_fd < 0); - rel->rd_fd = smgrcreate(DEFAULT_SMGR, rel); - Assert(rel->rd_fd >= 0); + Assert(rel->rd_smgr == NULL); + rel->rd_smgr = smgropen(rel->rd_node); + smgrcreate(rel->rd_smgr, rel->rd_istemp, false); } /* ---------------------------------------------------------------- @@ -1210,7 +1210,12 @@ heap_drop_with_catalog(Oid rid) */ if (rel->rd_rel->relkind != RELKIND_VIEW && rel->rd_rel->relkind != RELKIND_COMPOSITE_TYPE) - smgrunlink(DEFAULT_SMGR, rel); + { + if (rel->rd_smgr == NULL) + rel->rd_smgr = smgropen(rel->rd_node); + smgrscheduleunlink(rel->rd_smgr, rel->rd_istemp); + rel->rd_smgr = NULL; + } /* * Close relcache entry, but *keep* AccessExclusiveLock on the @@ -1706,7 +1711,7 @@ SetRelationNumChecks(Relation rel, int numchecks) else { /* Skip the disk update, but force relcache inval anyway */ - CacheInvalidateRelcache(RelationGetRelid(rel)); + CacheInvalidateRelcache(rel); } heap_freetuple(reltup); @@ -1943,7 +1948,9 @@ RelationTruncateIndexes(Oid heapId) DropRelationBuffers(currentIndex); /* Now truncate the actual data and set blocks to zero */ - smgrtruncate(DEFAULT_SMGR, currentIndex, 0); + if (currentIndex->rd_smgr == NULL) + currentIndex->rd_smgr = smgropen(currentIndex->rd_node); + smgrtruncate(currentIndex->rd_smgr, 0); currentIndex->rd_nblocks = 0; currentIndex->rd_targblock = InvalidBlockNumber; @@ -1990,7 +1997,9 @@ heap_truncate(Oid rid) DropRelationBuffers(rel); /* Now truncate the actual data and set blocks to zero */ - smgrtruncate(DEFAULT_SMGR, rel, 0); + if (rel->rd_smgr == NULL) + rel->rd_smgr = smgropen(rel->rd_node); + smgrtruncate(rel->rd_smgr, 0); rel->rd_nblocks = 0; rel->rd_targblock = InvalidBlockNumber; diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 4180526301a..9c92f217409 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.226 2004/01/28 21:02:39 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.227 2004/02/10 01:55:24 tgl Exp $ * * * INTERFACE ROUTINES @@ -808,7 +808,11 @@ index_drop(Oid indexId) if (i < 0) elog(ERROR, "FlushRelationBuffers returned %d", i); - smgrunlink(DEFAULT_SMGR, userIndexRelation); + if (userIndexRelation->rd_smgr == NULL) + userIndexRelation->rd_smgr = smgropen(userIndexRelation->rd_node); + smgrscheduleunlink(userIndexRelation->rd_smgr, + userIndexRelation->rd_istemp); + userIndexRelation->rd_smgr = NULL; /* * We are presently too lazy to attempt to compute the new correct @@ -818,7 +822,7 @@ index_drop(Oid indexId) * owning relation to ensure other backends update their relcache * lists of indexes. */ - CacheInvalidateRelcache(heapId); + CacheInvalidateRelcache(userHeapRelation); /* * Close rels, but keep locks @@ -1057,7 +1061,7 @@ setRelhasindex(Oid relid, bool hasindex, bool isprimary, Oid reltoastidxid) else { /* no need to change tuple, but force relcache rebuild anyway */ - CacheInvalidateRelcache(relid); + CacheInvalidateRelcacheByTuple(tuple); } if (!pg_class_scan) @@ -1077,10 +1081,11 @@ void setNewRelfilenode(Relation relation) { Oid newrelfilenode; + RelFileNode newrnode; + SMgrRelation srel; Relation pg_class; HeapTuple tuple; Form_pg_class rd_rel; - RelationData workrel; /* Can't change relfilenode for nailed tables (indexes ok though) */ Assert(!relation->rd_isnailed || @@ -1107,14 +1112,18 @@ setNewRelfilenode(Relation relation) /* create another storage file. Is it a little ugly ? */ /* NOTE: any conflict in relfilenode value will be caught here */ - memcpy((char *) &workrel, relation, sizeof(RelationData)); - workrel.rd_fd = -1; - workrel.rd_node.relNode = newrelfilenode; - heap_storage_create(&workrel); - smgrclose(DEFAULT_SMGR, &workrel); + newrnode = relation->rd_node; + newrnode.relNode = newrelfilenode; + + srel = smgropen(newrnode); + smgrcreate(srel, relation->rd_istemp, false); + smgrclose(srel); /* schedule unlinking old relfilenode */ - smgrunlink(DEFAULT_SMGR, relation); + if (relation->rd_smgr == NULL) + relation->rd_smgr = smgropen(relation->rd_node); + smgrscheduleunlink(relation->rd_smgr, relation->rd_istemp); + relation->rd_smgr = NULL; /* update the pg_class row */ rd_rel->relfilenode = newrelfilenode; @@ -1672,7 +1681,9 @@ reindex_index(Oid indexId) DropRelationBuffers(iRel); /* Now truncate the actual data and set blocks to zero */ - smgrtruncate(DEFAULT_SMGR, iRel, 0); + if (iRel->rd_smgr == NULL) + iRel->rd_smgr = smgropen(iRel->rd_node); + smgrtruncate(iRel->rd_smgr, 0); iRel->rd_nblocks = 0; iRel->rd_targblock = InvalidBlockNumber; } diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 66850d32d56..7af8200e063 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.217 2004/01/28 21:02:39 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.218 2004/02/10 01:55:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -40,6 +40,7 @@ #include "parser/parse_coerce.h" #include "parser/parse_relation.h" #include "rewrite/rewriteHandler.h" +#include "storage/fd.h" #include "tcop/pquery.h" #include "tcop/tcopprot.h" #include "utils/acl.h" diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 2b8fdb9a2d4..85f49537efc 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.130 2004/01/07 18:56:25 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.131 2004/02/10 01:55:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -30,6 +30,7 @@ #include "commands/comment.h" #include "commands/dbcommands.h" #include "miscadmin.h" +#include "storage/fd.h" #include "storage/freespace.h" #include "storage/sinval.h" #include "utils/acl.h" diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 809f425bc65..6fadd0d4e15 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.97 2004/01/28 21:02:39 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.98 2004/02/10 01:55:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1010,7 +1010,7 @@ setRelhassubclassInRelation(Oid relationId, bool relhassubclass) else { /* no need to change tuple, but force relcache rebuild anyway */ - CacheInvalidateRelcache(relationId); + CacheInvalidateRelcacheByTuple(tuple); } heap_freetuple(tuple); diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 34cfc4d10e3..bddf3f5ad68 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.163 2003/11/29 19:51:47 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.164 2004/02/10 01:55:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -691,7 +691,7 @@ renametrig(Oid relid, * relcache entries. (Ideally this should happen * automatically...) */ - CacheInvalidateRelcache(relid); + CacheInvalidateRelcache(targetrel); } else { diff --git a/src/backend/commands/user.c b/src/backend/commands/user.c index d2d1a3c7a95..9352aeb0ec0 100644 --- a/src/backend/commands/user.c +++ b/src/backend/commands/user.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/commands/user.c,v 1.136 2004/02/02 17:21:07 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/commands/user.c,v 1.137 2004/02/10 01:55:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -27,6 +27,7 @@ #include "commands/user.h" #include "libpq/crypt.h" #include "miscadmin.h" +#include "storage/fd.h" #include "storage/pmsignal.h" #include "utils/acl.h" #include "utils/array.h" diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index dae8c3f37cd..29a2df1ef1d 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -13,7 +13,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.271 2004/01/07 18:56:25 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.272 2004/02/10 01:55:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2513,7 +2513,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel, /* truncate relation, if needed */ if (blkno < nblocks) { - blkno = smgrtruncate(DEFAULT_SMGR, onerel, blkno); + if (onerel->rd_smgr == NULL) + onerel->rd_smgr = smgropen(onerel->rd_node); + blkno = smgrtruncate(onerel->rd_smgr, blkno); onerel->rd_nblocks = blkno; /* update relcache immediately */ onerel->rd_targblock = InvalidBlockNumber; vacrelstats->rel_pages = blkno; /* set new number of blocks */ @@ -2582,7 +2584,9 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages) (errmsg("\"%s\": truncated %u to %u pages", RelationGetRelationName(onerel), vacrelstats->rel_pages, relblocks))); - relblocks = smgrtruncate(DEFAULT_SMGR, onerel, relblocks); + if (onerel->rd_smgr == NULL) + onerel->rd_smgr = smgropen(onerel->rd_node); + relblocks = smgrtruncate(onerel->rd_smgr, relblocks); onerel->rd_nblocks = relblocks; /* update relcache immediately */ onerel->rd_targblock = InvalidBlockNumber; vacrelstats->rel_pages = relblocks; /* set new number of diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index c2711528770..17f91efef70 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -31,7 +31,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.35 2004/02/06 19:36:17 wieck Exp $ + * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.36 2004/02/10 01:55:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -148,9 +148,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt) vac_open_indexes(onerel, &nindexes, &Irel); hasindex = (nindexes > 0); - /* Turn on vacuum cost accounting */ - if (VacuumCostNaptime > 0) - VacuumCostActive = true; + /* Turn vacuum cost accounting on or off */ + VacuumCostActive = (VacuumCostNaptime > 0); VacuumCostBalance = 0; /* Do the vacuuming */ @@ -784,7 +783,9 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats) /* * Do the physical truncation. */ - new_rel_pages = smgrtruncate(DEFAULT_SMGR, onerel, new_rel_pages); + if (onerel->rd_smgr == NULL) + onerel->rd_smgr = smgropen(onerel->rd_node); + new_rel_pages = smgrtruncate(onerel->rd_smgr, new_rel_pages); onerel->rd_nblocks = new_rel_pages; /* update relcache immediately */ onerel->rd_targblock = InvalidBlockNumber; vacrelstats->rel_pages = new_rel_pages; /* save new number of diff --git a/src/backend/libpq/be-fsstubs.c b/src/backend/libpq/be-fsstubs.c index aa8ba2f884a..ed19e76db2c 100644 --- a/src/backend/libpq/be-fsstubs.c +++ b/src/backend/libpq/be-fsstubs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/libpq/be-fsstubs.c,v 1.69 2003/11/29 19:51:49 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/libpq/be-fsstubs.c,v 1.70 2004/02/10 01:55:25 tgl Exp $ * * NOTES * This should be moved to a more appropriate place. It is here @@ -41,6 +41,7 @@ #include "libpq/be-fsstubs.h" #include "libpq/libpq-fs.h" #include "miscadmin.h" +#include "storage/fd.h" #include "storage/large_object.h" #include "utils/memutils.h" diff --git a/src/backend/rewrite/rewriteDefine.c b/src/backend/rewrite/rewriteDefine.c index f1cbe96fd2a..995afe5509e 100644 --- a/src/backend/rewrite/rewriteDefine.c +++ b/src/backend/rewrite/rewriteDefine.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/rewrite/rewriteDefine.c,v 1.92 2004/01/14 23:01:55 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/rewrite/rewriteDefine.c,v 1.93 2004/02/10 01:55:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -480,7 +480,12 @@ DefineQueryRewrite(RuleStmt *stmt) * XXX what about getting rid of its TOAST table? For now, we don't. */ if (RelisBecomingView) - smgrunlink(DEFAULT_SMGR, event_relation); + { + if (event_relation->rd_smgr == NULL) + event_relation->rd_smgr = smgropen(event_relation->rd_node); + smgrscheduleunlink(event_relation->rd_smgr, event_relation->rd_istemp); + event_relation->rd_smgr = NULL; + } /* Close rel, but keep lock till commit... */ heap_close(event_relation, NoLock); diff --git a/src/backend/rewrite/rewriteSupport.c b/src/backend/rewrite/rewriteSupport.c index 54fdcfcddeb..6e01de4b5cb 100644 --- a/src/backend/rewrite/rewriteSupport.c +++ b/src/backend/rewrite/rewriteSupport.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/rewrite/rewriteSupport.c,v 1.57 2003/11/29 19:51:55 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/rewrite/rewriteSupport.c,v 1.58 2004/02/10 01:55:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -84,7 +84,7 @@ SetRelationRuleStatus(Oid relationId, bool relHasRules, else { /* no need to change tuple, but force relcache rebuild anyway */ - CacheInvalidateRelcache(relationId); + CacheInvalidateRelcacheByTuple(tuple); } heap_freetuple(tuple); diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index b927b5ea5e7..203e03ab059 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.156 2004/02/06 19:36:18 wieck Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.157 2004/02/10 01:55:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -85,7 +85,7 @@ static Buffer ReadBufferInternal(Relation reln, BlockNumber blockNum, bool bufferLockHeld); static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr); -static bool BufferReplace(BufferDesc *bufHdr); +static void BufferReplace(BufferDesc *bufHdr); #ifdef NOT_USED void PrintBufferDescs(void); @@ -127,7 +127,6 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum, bool bufferLockHeld) { BufferDesc *bufHdr; - int status; bool found; bool isExtend; bool isLocalBuf; @@ -135,6 +134,10 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum, isExtend = (blockNum == P_NEW); isLocalBuf = reln->rd_istemp; + /* Open it at the smgr level if not already done */ + if (reln->rd_smgr == NULL) + reln->rd_smgr = smgropen(reln->rd_node); + if (isLocalBuf) { ReadLocalBufferCount++; @@ -160,7 +163,7 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum, if (isExtend) { /* must be sure we have accurate file length! */ - blockNum = reln->rd_nblocks = smgrnblocks(DEFAULT_SMGR, reln); + blockNum = reln->rd_nblocks = smgrnblocks(reln->rd_smgr); reln->rd_nblocks++; } @@ -207,23 +210,19 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum, } /* - * if we have gotten to this point, the reln pointer must be ok and - * the relation file must be open. + * if we have gotten to this point, the relation must be open in the smgr. */ if (isExtend) { /* new buffers are zero-filled */ MemSet((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ); - status = smgrextend(DEFAULT_SMGR, reln, blockNum, - (char *) MAKE_PTR(bufHdr->data)); + smgrextend(reln->rd_smgr, blockNum, (char *) MAKE_PTR(bufHdr->data)); } else { - status = smgrread(DEFAULT_SMGR, reln, blockNum, - (char *) MAKE_PTR(bufHdr->data)); + smgrread(reln->rd_smgr, blockNum, (char *) MAKE_PTR(bufHdr->data)); /* check for garbage data */ - if (status == SM_SUCCESS && - !PageHeaderIsValid((PageHeader) MAKE_PTR(bufHdr->data))) + if (!PageHeaderIsValid((PageHeader) MAKE_PTR(bufHdr->data))) { /* * During WAL recovery, the first access to any data page should @@ -250,47 +249,20 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum, if (isLocalBuf) { /* No shared buffer state to update... */ - if (status == SM_FAIL) - { - bufHdr->flags |= BM_IO_ERROR; - return InvalidBuffer; - } return BufferDescriptorGetBuffer(bufHdr); } /* lock buffer manager again to update IO IN PROGRESS */ LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); - if (status == SM_FAIL) - { - /* IO Failed. cleanup the data structures and go home */ - StrategyInvalidateBuffer(bufHdr); - - /* remember that BufferAlloc() pinned the buffer */ - UnpinBuffer(bufHdr); - - /* - * Have to reset the flag so that anyone waiting for the buffer - * can tell that the contents are invalid. - */ - bufHdr->flags |= BM_IO_ERROR; - bufHdr->flags &= ~BM_IO_IN_PROGRESS; - } - else - { - /* IO Succeeded. clear the flags, finish buffer update */ - - bufHdr->flags &= ~(BM_IO_ERROR | BM_IO_IN_PROGRESS); - } + /* IO Succeeded. clear the flags, finish buffer update */ + bufHdr->flags &= ~(BM_IO_ERROR | BM_IO_IN_PROGRESS); /* If anyone was waiting for IO to complete, wake them up now */ TerminateBufferIO(bufHdr); LWLockRelease(BufMgrLock); - if (status == SM_FAIL) - return InvalidBuffer; - return BufferDescriptorGetBuffer(bufHdr); } @@ -391,8 +363,6 @@ BufferAlloc(Relation reln, if (buf->flags & BM_DIRTY || buf->cntxDirty) { - bool replace_ok; - /* * skip write error buffers */ @@ -425,39 +395,21 @@ BufferAlloc(Relation reln, * Write the buffer out, being careful to release BufMgrLock * before starting the I/O. */ - replace_ok = BufferReplace(buf); + BufferReplace(buf); - if (replace_ok == false) + /* + * BM_JUST_DIRTIED cleared by BufferReplace and shouldn't + * be set by anyone. - vadim 01/17/97 + */ + if (buf->flags & BM_JUST_DIRTIED) { - ereport(WARNING, - (errcode(ERRCODE_IO_ERROR), - errmsg("could not write block %u of %u/%u", - buf->tag.blockNum, - buf->tag.rnode.tblNode, - buf->tag.rnode.relNode))); - inProgress = FALSE; - buf->flags |= BM_IO_ERROR; - buf->flags &= ~BM_IO_IN_PROGRESS; - TerminateBufferIO(buf); - UnpinBuffer(buf); - buf = NULL; + elog(PANIC, "content of block %u of %u/%u changed while flushing", + buf->tag.blockNum, + buf->tag.rnode.tblNode, buf->tag.rnode.relNode); } - else - { - /* - * BM_JUST_DIRTIED cleared by BufferReplace and shouldn't - * be set by anyone. - vadim 01/17/97 - */ - if (buf->flags & BM_JUST_DIRTIED) - { - elog(PANIC, "content of block %u of %u/%u changed while flushing", - buf->tag.blockNum, - buf->tag.rnode.tblNode, buf->tag.rnode.relNode); - } - buf->flags &= ~BM_DIRTY; - buf->cntxDirty = false; - } + buf->flags &= ~BM_DIRTY; + buf->cntxDirty = false; /* * Somebody could have pinned the buffer while we were doing @@ -721,10 +673,8 @@ BufferSync(int percent, int maxpages) for (i = 0; i < num_buffer_dirty; i++) { Buffer buffer; - int status; - RelFileNode rnode; XLogRecPtr recptr; - Relation reln; + SMgrRelation reln; LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); @@ -775,16 +725,10 @@ BufferSync(int percent, int maxpages) StartBufferIO(bufHdr, false); /* output IO start */ buffer = BufferDescriptorGetBuffer(bufHdr); - rnode = bufHdr->tag.rnode; LWLockRelease(BufMgrLock); /* - * Try to find relation for buffer - */ - reln = RelationNodeCacheGetRelation(rnode); - - /* * Protect buffer content against concurrent update */ LockBuffer(buffer, BUFFER_LOCK_SHARE); @@ -805,27 +749,13 @@ BufferSync(int percent, int maxpages) bufHdr->flags &= ~BM_JUST_DIRTIED; LWLockRelease(BufMgrLock); - if (reln == NULL) - { - status = smgrblindwrt(DEFAULT_SMGR, - bufHdr->tag.rnode, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - } - else - { - status = smgrwrite(DEFAULT_SMGR, reln, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - } + /* Find smgr relation for buffer */ + reln = smgropen(bufHdr->tag.rnode); - if (status == SM_FAIL) /* disk failure ?! */ - ereport(PANIC, - (errcode(ERRCODE_IO_ERROR), - errmsg("could not write block %u of %u/%u", - bufHdr->tag.blockNum, - bufHdr->tag.rnode.tblNode, - bufHdr->tag.rnode.relNode))); + /* And write... */ + smgrwrite(reln, + bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); /* * Note that it's safe to change cntxDirty here because of we @@ -853,10 +783,6 @@ BufferSync(int percent, int maxpages) bufHdr->flags &= ~BM_DIRTY; UnpinBuffer(bufHdr); LWLockRelease(BufMgrLock); - - /* drop refcnt obtained by RelationNodeCacheGetRelation */ - if (reln != NULL) - RelationDecrementReferenceCount(reln); } pfree(buffer_dirty); @@ -1026,13 +952,23 @@ BufferBackgroundWriter(void) n = BufferSync(BgWriterPercent, BgWriterMaxpages); /* - * Whatever signal is sent to us, let's just die galantly. If + * Whatever signal is sent to us, let's just die gallantly. If * it wasn't meant that way, the postmaster will reincarnate us. */ if (InterruptPending) return; /* + * Whenever we have nothing to do, close all smgr files. This + * is so we won't hang onto smgr references to deleted files + * indefinitely. XXX this is a bogus, temporary solution. 'Twould + * be much better to do this once per checkpoint, but the bgwriter + * doesn't yet know anything about checkpoints. + */ + if (n == 0) + smgrcloseall(); + + /* * Nap for the configured time or sleep for 10 seconds if * there was nothing to do at all. */ @@ -1073,17 +1009,15 @@ BufferGetBlockNumber(Buffer buffer) /* * BufferReplace * - * Write out the buffer corresponding to 'bufHdr'. Returns 'true' if - * the buffer was successfully written out, 'false' otherwise. + * Write out the buffer corresponding to 'bufHdr'. * * BufMgrLock must be held at entry, and the buffer must be pinned. */ -static bool +static void BufferReplace(BufferDesc *bufHdr) { - Relation reln; + SMgrRelation reln; XLogRecPtr recptr; - int status; ErrorContextCallback errcontext; /* To check if block content changed while flushing. - vadim 01/17/97 */ @@ -1104,36 +1038,20 @@ BufferReplace(BufferDesc *bufHdr) recptr = BufferGetLSN(bufHdr); XLogFlush(recptr); - reln = RelationNodeCacheGetRelation(bufHdr->tag.rnode); + /* Find smgr relation for buffer */ + reln = smgropen(bufHdr->tag.rnode); - if (reln != NULL) - { - status = smgrwrite(DEFAULT_SMGR, reln, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - } - else - { - status = smgrblindwrt(DEFAULT_SMGR, bufHdr->tag.rnode, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - } - - /* drop relcache refcnt incremented by RelationNodeCacheGetRelation */ - if (reln != NULL) - RelationDecrementReferenceCount(reln); + /* And write... */ + smgrwrite(reln, + bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); /* Pop the error context stack */ error_context_stack = errcontext.previous; LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); - if (status == SM_FAIL) - return false; - BufferFlushCount++; - - return true; } /* @@ -1151,12 +1069,17 @@ RelationGetNumberOfBlocks(Relation relation) * * Don't call smgr on a view or a composite type, either. */ - if (relation->rd_rel->relkind == RELKIND_VIEW) - relation->rd_nblocks = 0; - else if (relation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) + if (relation->rd_rel->relkind == RELKIND_VIEW || + relation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) relation->rd_nblocks = 0; else if (!relation->rd_isnew && !relation->rd_istemp) - relation->rd_nblocks = smgrnblocks(DEFAULT_SMGR, relation); + { + /* Open it at the smgr level if not already done */ + if (relation->rd_smgr == NULL) + relation->rd_smgr = smgropen(relation->rd_node); + + relation->rd_nblocks = smgrnblocks(relation->rd_smgr); + } return relation->rd_nblocks; } @@ -1172,12 +1095,17 @@ RelationGetNumberOfBlocks(Relation relation) void RelationUpdateNumberOfBlocks(Relation relation) { - if (relation->rd_rel->relkind == RELKIND_VIEW) - relation->rd_nblocks = 0; - else if (relation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) + if (relation->rd_rel->relkind == RELKIND_VIEW || + relation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) relation->rd_nblocks = 0; else - relation->rd_nblocks = smgrnblocks(DEFAULT_SMGR, relation); + { + /* Open it at the smgr level if not already done */ + if (relation->rd_smgr == NULL) + relation->rd_smgr = smgropen(relation->rd_node); + + relation->rd_nblocks = smgrnblocks(relation->rd_smgr); + } } /* --------------------------------------------------------------------- @@ -1465,7 +1393,6 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) int i; BufferDesc *bufHdr; XLogRecPtr recptr; - int status; ErrorContextCallback errcontext; /* Setup error traceback support for ereport() */ @@ -1484,17 +1411,13 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) { if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty) { - status = smgrwrite(DEFAULT_SMGR, rel, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - if (status == SM_FAIL) - { - error_context_stack = errcontext.previous; - elog(WARNING, "FlushRelationBuffers(\"%s\" (local), %u): block %u is dirty, could not flush it", - RelationGetRelationName(rel), firstDelBlock, - bufHdr->tag.blockNum); - return (-1); - } + /* Open it at the smgr level if not already done */ + if (rel->rd_smgr == NULL) + rel->rd_smgr = smgropen(rel->rd_node); + + smgrwrite(rel->rd_smgr, + bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); bufHdr->cntxDirty = false; } @@ -1553,17 +1476,13 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) LWLockRelease(BufMgrLock); - status = smgrwrite(DEFAULT_SMGR, rel, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); + /* Open it at the smgr level if not already done */ + if (rel->rd_smgr == NULL) + rel->rd_smgr = smgropen(rel->rd_node); - if (status == SM_FAIL) /* disk failure ?! */ - ereport(PANIC, - (errcode(ERRCODE_IO_ERROR), - errmsg("could not write block %u of %u/%u", - bufHdr->tag.blockNum, - bufHdr->tag.rnode.tblNode, - bufHdr->tag.rnode.relNode))); + smgrwrite(rel->rd_smgr, + bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); BufferFlushCount++; @@ -2046,7 +1965,11 @@ AbortBufferIO(void) LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); Assert(buf->flags & BM_IO_IN_PROGRESS); if (IsForInput) + { Assert(!(buf->flags & BM_DIRTY) && !(buf->cntxDirty)); + /* Don't think that buffer is valid */ + StrategyInvalidateBuffer(buf); + } else { Assert(buf->flags & BM_DIRTY || buf->cntxDirty); diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 01c83039280..bcbedc9c651 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.51 2004/01/07 18:56:27 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.52 2004/02/10 01:55:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -90,24 +90,15 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr) */ if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty) { - Relation bufrel = RelationNodeCacheGetRelation(bufHdr->tag.rnode); + SMgrRelation reln; - /* flush this page */ - if (bufrel == NULL) - { - smgrblindwrt(DEFAULT_SMGR, - bufHdr->tag.rnode, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - } - else - { - smgrwrite(DEFAULT_SMGR, bufrel, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - /* drop refcount incremented by RelationNodeCacheGetRelation */ - RelationDecrementReferenceCount(bufrel); - } + /* Find smgr relation for buffer */ + reln = smgropen(bufHdr->tag.rnode); + + /* And write... */ + smgrwrite(reln, + bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); LocalBufferFlushCount++; } @@ -143,9 +134,6 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr) /* * it's all ours now. - * - * We need not in tblNode currently but will in future I think, when - * we'll give up rel->rd_fd to fmgr cache. */ bufHdr->tag.rnode = reln->rd_node; bufHdr->tag.blockNum = blockNum; diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 7de7d85e74d..f95b1b34410 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.63 2004/01/26 22:59:53 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.64 2004/02/10 01:55:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -66,9 +66,6 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, #ifdef EXEC_BACKEND size += ShmemBackendArraySize(); #endif -#ifdef STABLE_MEMORY_STORAGE - size += MMShmemSize(); -#endif size += 100000; /* might as well round it off to a multiple of a typical page size */ size += 8192 - (size % 8192); diff --git a/src/backend/storage/smgr/Makefile b/src/backend/storage/smgr/Makefile index 7c2a0f62b2a..71695f9a749 100644 --- a/src/backend/storage/smgr/Makefile +++ b/src/backend/storage/smgr/Makefile @@ -4,7 +4,7 @@ # Makefile for storage/smgr # # IDENTIFICATION -# $PostgreSQL: pgsql/src/backend/storage/smgr/Makefile,v 1.14 2003/11/29 19:51:57 pgsql Exp $ +# $PostgreSQL: pgsql/src/backend/storage/smgr/Makefile,v 1.15 2004/02/10 01:55:26 tgl Exp $ # #------------------------------------------------------------------------- @@ -12,7 +12,7 @@ subdir = src/backend/storage/smgr top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = md.o mm.o smgr.o smgrtype.o +OBJS = md.o smgr.o smgrtype.o all: SUBSYS.o diff --git a/src/backend/storage/smgr/README b/src/backend/storage/smgr/README index 606431f926f..124d5bcdffc 100644 --- a/src/backend/storage/smgr/README +++ b/src/backend/storage/smgr/README @@ -1,40 +1,31 @@ -# $PostgreSQL: pgsql/src/backend/storage/smgr/README,v 1.2 2003/11/29 19:51:57 pgsql Exp $ +# $PostgreSQL: pgsql/src/backend/storage/smgr/README,v 1.3 2004/02/10 01:55:26 tgl Exp $ -This directory contains the code that supports the Postgres storage manager -switch and all of the installed storage managers. In released systems, -the only supported storage manager is the magnetic disk manager. At UC -Berkeley, the Sony WORM optical disk jukebox and persistent main memory are -also supported. +In the original Berkeley Postgres system, there were several storage managers, +of which only the "magnetic disk" manager remains. (At Berkeley there were +also managers for the Sony WORM optical disk jukebox and persistent main +memory, but these were never supported in any externally released Postgres, +nor in any version of PostgreSQL.) However, we retain the notion of a storage +manager switch in case anyone wants to reintroduce other kinds of storage +managers. -As of Postgres Release 3.0, every relation in the system is tagged with the -storage manager on which it resides. The storage manager switch code turns -what used to by filesystem operations into operations on the correct store, -for any given relation. +In Berkeley Postgres each relation was tagged with the ID of the storage +manager to use for it. This is gone. It would be more reasonable to +associate storage managers with tablespaces (a feature not present as this +text is being written, but one likely to emerge soon). The files in this directory, and their contents, are smgrtype.c Storage manager type -- maps string names to storage manager IDs and provides simple comparison operators. This is the regproc support for type 'smgr' in the system catalogs. + (This is vestigial since no columns of type smgr exist + in the catalogs anymore.) smgr.c The storage manager switch dispatch code. The routines in this file call the appropriate storage manager to do hardware - accesses requested by the backend. + accesses requested by the backend. smgr.c also manages the + file handle cache (SMgrRelation table). md.c The magnetic disk storage manager. - mm.c The persistent main memory storage manager (#undef'ed in - tmp/c.h for all distributed systems). - - sj.c The sony jukebox storage manager and cache management code - (#undef'ed in tmp/c.h for all distributed systems). The - routines in this file allocate extents, maintain block - maps, and guarantee the persistence and coherency of a cache - of jukebox blocks on magnetic disk. - - pgjb.c The postgres jukebox interface routines. The routines here - handle exclusion on the physical device and translate requests - from the storage manager code (sj.c) into jbaccess calls. - - jbaccess.c Access code for the physical Sony jukebox device. This code - was swiped from Andy McFadden's jblib.a code at UC Berkeley. +Note that md.c in turn relies on src/backend/storage/file/fd.c. diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 0405c2849a0..58629218a3c 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.101 2004/01/07 18:56:27 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.102 2004/02/10 01:55:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -21,108 +21,81 @@ #include "catalog/catalog.h" #include "miscadmin.h" +#include "storage/fd.h" #include "storage/smgr.h" -#include "utils/inval.h" #include "utils/memutils.h" + /* * The magnetic disk storage manager keeps track of open file * descriptors in its own descriptor pool. This is done to make it * easier to support relations that are larger than the operating - * system's file size limit (often 2GBytes). In order to do that, we + * system's file size limit (often 2GBytes). In order to do that, * we break relations up into chunks of < 2GBytes and store one chunk * in each of several files that represent the relation. See the * BLCKSZ and RELSEG_SIZE configuration constants in - * include/pg_config.h. + * include/pg_config.h. All chunks except the last MUST have size exactly + * equal to RELSEG_SIZE blocks --- see mdnblocks() and mdtruncate(). * - * The file descriptor stored in the relation cache (see RelationGetFile()) - * is actually an index into the Md_fdvec array. -1 indicates not open. + * The file descriptor pointer (md_fd field) stored in the SMgrRelation + * cache is, therefore, just the head of a list of MdfdVec objects. + * But note the md_fd pointer can be NULL, indicating relation not open. * - * When a relation is broken into multiple chunks, only the first chunk - * has its own entry in the Md_fdvec array; the remaining chunks have - * palloc'd MdfdVec objects that are chained onto the first chunk via the - * mdfd_chain links. All chunks except the last MUST have size exactly - * equal to RELSEG_SIZE blocks --- see mdnblocks() and mdtruncate(). + * All MdfdVec objects are palloc'd in the MdCxt memory context. */ typedef struct _MdfdVec { - int mdfd_vfd; /* fd number in vfd pool */ - int mdfd_flags; /* fd status flags */ + File mdfd_vfd; /* fd number in fd.c's pool */ -/* these are the assigned bits in mdfd_flags: */ -#define MDFD_FREE (1 << 0) /* unused entry */ - - int mdfd_nextFree; /* link to next freelist member, if free */ #ifndef LET_OS_MANAGE_FILESIZE struct _MdfdVec *mdfd_chain; /* for large relations */ #endif } MdfdVec; -static int Nfds = 100; /* initial/current size of Md_fdvec array */ -static MdfdVec *Md_fdvec = NULL; -static int Md_Free = -1; /* head of freelist of unused fdvec - * entries */ -static int CurFd = 0; /* first never-used fdvec index */ static MemoryContext MdCxt; /* context for all md.c allocations */ -/* routines declared here */ -static void mdclose_fd(int fd); -static int _mdfd_getrelnfd(Relation reln); -static MdfdVec *_mdfd_openseg(Relation reln, BlockNumber segno, int oflags); -static MdfdVec *_mdfd_getseg(Relation reln, BlockNumber blkno); - -static int _mdfd_blind_getseg(RelFileNode rnode, BlockNumber blkno); -static int _fdvec_alloc(void); -static void _fdvec_free(int); +/* routines declared here */ +static MdfdVec *mdopen(SMgrRelation reln); +static MdfdVec *_fdvec_alloc(void); +#ifndef LET_OS_MANAGE_FILESIZE +static MdfdVec *_mdfd_openseg(SMgrRelation reln, BlockNumber segno, + int oflags); +#endif +static MdfdVec *_mdfd_getseg(SMgrRelation reln, BlockNumber blkno); static BlockNumber _mdnblocks(File file, Size blcksz); + /* * mdinit() -- Initialize private state for magnetic disk storage manager. - * - * We keep a private table of all file descriptors. This routine - * allocates and initializes the table. - * - * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. */ -int +bool mdinit(void) { - int i; - MdCxt = AllocSetContextCreate(TopMemoryContext, "MdSmgr", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); - Md_fdvec = (MdfdVec *) MemoryContextAlloc(MdCxt, Nfds * sizeof(MdfdVec)); - - MemSet(Md_fdvec, 0, Nfds * sizeof(MdfdVec)); - - /* Set free list */ - for (i = 0; i < Nfds; i++) - { - Md_fdvec[i].mdfd_nextFree = i + 1; - Md_fdvec[i].mdfd_flags = MDFD_FREE; - } - Md_Free = 0; - Md_fdvec[Nfds - 1].mdfd_nextFree = -1; - - return SM_SUCCESS; + return true; } -int -mdcreate(Relation reln) +/* + * mdcreate() -- Create a new relation on magnetic disk. + * + * If isRedo is true, it's okay for the relation to exist already. + */ +bool +mdcreate(SMgrRelation reln, bool isRedo) { char *path; - int fd, - vfd; + File fd; - Assert(reln->rd_fd < 0); + Assert(reln->md_fd == NULL); - path = relpath(reln->rd_node); + path = relpath(reln->smgr_rnode); fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600); @@ -134,43 +107,45 @@ mdcreate(Relation reln) * During bootstrap, there are cases where a system relation will * be accessed (by internal backend processes) before the * bootstrap script nominally creates it. Therefore, allow the - * file to exist already, but in bootstrap mode only. (See also + * file to exist already, even if isRedo is not set. (See also * mdopen) */ - if (IsBootstrapProcessingMode()) + if (isRedo || IsBootstrapProcessingMode()) fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600); if (fd < 0) { pfree(path); /* be sure to return the error reported by create, not open */ errno = save_errno; - return -1; + return false; } errno = 0; } pfree(path); - vfd = _fdvec_alloc(); - if (vfd < 0) - return -1; + reln->md_fd = _fdvec_alloc(); - Md_fdvec[vfd].mdfd_vfd = fd; - Md_fdvec[vfd].mdfd_flags = (uint16) 0; + reln->md_fd->mdfd_vfd = fd; #ifndef LET_OS_MANAGE_FILESIZE - Md_fdvec[vfd].mdfd_chain = NULL; + reln->md_fd->mdfd_chain = NULL; #endif - return vfd; + return true; } /* * mdunlink() -- Unlink a relation. + * + * Note that we're passed a RelFileNode --- by the time this is called, + * there won't be an SMgrRelation hashtable entry anymore. + * + * If isRedo is true, it's okay for the relation to be already gone. */ -int -mdunlink(RelFileNode rnode) +bool +mdunlink(RelFileNode rnode, bool isRedo) { - int status = SM_SUCCESS; + bool status = true; int save_errno = 0; char *path; @@ -179,13 +154,16 @@ mdunlink(RelFileNode rnode) /* Delete the first segment, or only segment if not doing segmenting */ if (unlink(path) < 0) { - status = SM_FAIL; - save_errno = errno; + if (!isRedo || errno != ENOENT) + { + status = false; + save_errno = errno; + } } #ifndef LET_OS_MANAGE_FILESIZE /* Get the additional segments, if any */ - if (status == SM_SUCCESS) + if (status) { char *segpath = (char *) palloc(strlen(path) + 12); BlockNumber segno; @@ -198,7 +176,7 @@ mdunlink(RelFileNode rnode) /* ENOENT is expected after the last segment... */ if (errno != ENOENT) { - status = SM_FAIL; + status = false; save_errno = errno; } break; @@ -222,16 +200,15 @@ mdunlink(RelFileNode rnode) * relation (ie, blocknum is the current EOF), and so in case of * failure we clean up by truncating. * - * This routine returns SM_FAIL or SM_SUCCESS, with errno set as - * appropriate. + * This routine returns true or false, with errno set as appropriate. * * Note: this routine used to call mdnblocks() to get the block position * to write at, but that's pretty silly since the caller needs to know where * the block will be written, and accordingly must have done mdnblocks() * already. Might as well pass in the position and save a seek. */ -int -mdextend(Relation reln, BlockNumber blocknum, char *buffer) +bool +mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer) { long seekpos; int nbytes; @@ -256,7 +233,7 @@ mdextend(Relation reln, BlockNumber blocknum, char *buffer) * to make room for the new page's buffer. */ if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) - return SM_FAIL; + return false; if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ) { @@ -269,29 +246,32 @@ mdextend(Relation reln, BlockNumber blocknum, char *buffer) FileSeek(v->mdfd_vfd, seekpos, SEEK_SET); errno = save_errno; } - return SM_FAIL; + return false; } #ifndef LET_OS_MANAGE_FILESIZE Assert(_mdnblocks(v->mdfd_vfd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE)); #endif - return SM_SUCCESS; + return true; } /* - * mdopen() -- Open the specified relation. + * mdopen() -- Open the specified relation. ereport's on failure. + * + * Note we only open the first segment, when there are multiple segments. */ -int -mdopen(Relation reln) +static MdfdVec * +mdopen(SMgrRelation reln) { char *path; - int fd; - int vfd; + File fd; - Assert(reln->rd_fd < 0); + /* No work if already open */ + if (reln->md_fd) + return reln->md_fd; - path = relpath(reln->rd_node); + path = relpath(reln->smgr_rnode); fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600); @@ -309,57 +289,45 @@ mdopen(Relation reln) if (fd < 0) { pfree(path); - return -1; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open relation %u/%u: %m", + reln->smgr_rnode.tblNode, + reln->smgr_rnode.relNode))); } } pfree(path); - vfd = _fdvec_alloc(); - if (vfd < 0) - return -1; + reln->md_fd = _fdvec_alloc(); - Md_fdvec[vfd].mdfd_vfd = fd; - Md_fdvec[vfd].mdfd_flags = (uint16) 0; + reln->md_fd->mdfd_vfd = fd; #ifndef LET_OS_MANAGE_FILESIZE - Md_fdvec[vfd].mdfd_chain = NULL; + reln->md_fd->mdfd_chain = NULL; Assert(_mdnblocks(fd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE)); #endif - return vfd; + return reln->md_fd; } /* * mdclose() -- Close the specified relation, if it isn't closed already. * - * AND FREE fd vector! It may be re-used for other relations! - * reln should be flushed from cache after closing !.. - * - * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. + * Returns true or false with errno set as appropriate. */ -int -mdclose(Relation reln) +bool +mdclose(SMgrRelation reln) { - int fd; - - fd = RelationGetFile(reln); - if (fd < 0) - return SM_SUCCESS; /* already closed, so no work */ - - mdclose_fd(fd); - - reln->rd_fd = -1; + MdfdVec *v = reln->md_fd; - return SM_SUCCESS; -} + /* No work if already closed */ + if (v == NULL) + return true; -static void -mdclose_fd(int fd) -{ - MdfdVec *v; + reln->md_fd = NULL; /* prevent dangling pointer after error */ #ifndef LET_OS_MANAGE_FILESIZE - for (v = &Md_fdvec[fd]; v != NULL;) + while (v != NULL) { MdfdVec *ov = v; @@ -368,32 +336,24 @@ mdclose_fd(int fd) FileClose(v->mdfd_vfd); /* Now free vector */ v = v->mdfd_chain; - if (ov != &Md_fdvec[fd]) - pfree(ov); + pfree(ov); } - - Md_fdvec[fd].mdfd_chain = NULL; #else - v = &Md_fdvec[fd]; - if (v != NULL) - { - if (v->mdfd_vfd >= 0) - FileClose(v->mdfd_vfd); - } + if (v->mdfd_vfd >= 0) + FileClose(v->mdfd_vfd); + pfree(v); #endif - _fdvec_free(fd); + return true; } /* * mdread() -- Read the specified block from a relation. - * - * Returns SM_SUCCESS or SM_FAIL. */ -int -mdread(Relation reln, BlockNumber blocknum, char *buffer) +bool +mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer) { - int status; + bool status; long seekpos; int nbytes; MdfdVec *v; @@ -408,9 +368,9 @@ mdread(Relation reln, BlockNumber blocknum, char *buffer) #endif if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) - return SM_FAIL; + return false; - status = SM_SUCCESS; + status = true; if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ) { /* @@ -425,7 +385,7 @@ mdread(Relation reln, BlockNumber blocknum, char *buffer) (nbytes > 0 && mdnblocks(reln) == blocknum)) MemSet(buffer, 0, BLCKSZ); else - status = SM_FAIL; + status = false; } return status; @@ -433,11 +393,9 @@ mdread(Relation reln, BlockNumber blocknum, char *buffer) /* * mdwrite() -- Write the supplied block at the appropriate location. - * - * Returns SM_SUCCESS or SM_FAIL. */ -int -mdwrite(Relation reln, BlockNumber blocknum, char *buffer) +bool +mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer) { long seekpos; MdfdVec *v; @@ -452,69 +410,12 @@ mdwrite(Relation reln, BlockNumber blocknum, char *buffer) #endif if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) - return SM_FAIL; + return false; if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ) - return SM_FAIL; + return false; - return SM_SUCCESS; -} - -/* - * mdblindwrt() -- Write a block to disk blind. - * - * We have to be able to do this using only the rnode of the relation - * in which the block belongs. Otherwise this is much like mdwrite(). - */ -int -mdblindwrt(RelFileNode rnode, - BlockNumber blkno, - char *buffer) -{ - int status; - long seekpos; - int fd; - - fd = _mdfd_blind_getseg(rnode, blkno); - - if (fd < 0) - return SM_FAIL; - -#ifndef LET_OS_MANAGE_FILESIZE - seekpos = (long) (BLCKSZ * (blkno % ((BlockNumber) RELSEG_SIZE))); - Assert(seekpos < BLCKSZ * RELSEG_SIZE); -#else - seekpos = (long) (BLCKSZ * (blkno)); -#endif - - errno = 0; - if (lseek(fd, seekpos, SEEK_SET) != seekpos) - { - elog(LOG, "lseek(%ld) failed: %m", seekpos); - close(fd); - return SM_FAIL; - } - - status = SM_SUCCESS; - - /* write the block */ - errno = 0; - if (write(fd, buffer, BLCKSZ) != BLCKSZ) - { - /* if write didn't set errno, assume problem is no disk space */ - if (errno == 0) - errno = ENOSPC; - elog(LOG, "write() failed: %m"); - status = SM_FAIL; - } - - if (close(fd) < 0) - { - elog(LOG, "close() failed: %m"); - status = SM_FAIL; - } - - return status; + return true; } /* @@ -525,24 +426,16 @@ mdblindwrt(RelFileNode rnode, * called, then only segments up to the last one actually touched * are present in the chain... * - * Returns # of blocks, ereport's on error. + * Returns # of blocks, or InvalidBlockNumber on error. */ BlockNumber -mdnblocks(Relation reln) +mdnblocks(SMgrRelation reln) { - int fd; - MdfdVec *v; + MdfdVec *v = mdopen(reln); #ifndef LET_OS_MANAGE_FILESIZE BlockNumber nblocks; - BlockNumber segno; -#endif - - fd = _mdfd_getrelnfd(reln); - v = &Md_fdvec[fd]; - -#ifndef LET_OS_MANAGE_FILESIZE - segno = 0; + BlockNumber segno = 0; /* * Skip through any segments that aren't the last one, to avoid @@ -583,8 +476,7 @@ mdnblocks(Relation reln) */ v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT); if (v->mdfd_chain == NULL) - elog(ERROR, "could not count blocks of \"%s\": %m", - RelationGetRelationName(reln)); + return InvalidBlockNumber; /* failed? */ } v = v->mdfd_chain; @@ -600,9 +492,8 @@ mdnblocks(Relation reln) * Returns # of blocks or InvalidBlockNumber on error. */ BlockNumber -mdtruncate(Relation reln, BlockNumber nblocks) +mdtruncate(SMgrRelation reln, BlockNumber nblocks) { - int fd; MdfdVec *v; BlockNumber curnblk; @@ -615,13 +506,14 @@ mdtruncate(Relation reln, BlockNumber nblocks) * that truncate/delete loop will get them all! */ curnblk = mdnblocks(reln); + if (curnblk == InvalidBlockNumber) + return InvalidBlockNumber; /* mdnblocks failed */ if (nblocks > curnblk) return InvalidBlockNumber; /* bogus request */ if (nblocks == curnblk) return nblocks; /* no work */ - fd = _mdfd_getrelnfd(reln); - v = &Md_fdvec[fd]; + v = mdopen(reln); #ifndef LET_OS_MANAGE_FILESIZE priorblocks = 0; @@ -641,7 +533,7 @@ mdtruncate(Relation reln, BlockNumber nblocks) FileTruncate(v->mdfd_vfd, 0); FileUnlink(v->mdfd_vfd); v = v->mdfd_chain; - Assert(ov != &Md_fdvec[fd]); /* we never drop the 1st + Assert(ov != reln->md_fd); /* we never drop the 1st * segment */ pfree(ov); } @@ -682,115 +574,65 @@ mdtruncate(Relation reln, BlockNumber nblocks) /* * mdcommit() -- Commit a transaction. - * - * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. */ -int +bool mdcommit(void) { /* * We don't actually have to do anything here... */ - return SM_SUCCESS; + return true; } /* * mdabort() -- Abort a transaction. */ -int +bool mdabort(void) { /* * We don't actually have to do anything here... */ - return SM_SUCCESS; + return true; } /* * mdsync() -- Sync previous writes to stable storage. */ -int +bool mdsync(void) { sync(); if (IsUnderPostmaster) sleep(2); sync(); - return SM_SUCCESS; + return true; } /* - * _fdvec_alloc() -- Grab a free (or new) md file descriptor vector. + * _fdvec_alloc() -- Make a MdfdVec object. */ -static int +static MdfdVec * _fdvec_alloc(void) { - MdfdVec *nvec; - int fdvec, - i; - - if (Md_Free >= 0) /* get from free list */ - { - fdvec = Md_Free; - Md_Free = Md_fdvec[fdvec].mdfd_nextFree; - Assert(Md_fdvec[fdvec].mdfd_flags == MDFD_FREE); - Md_fdvec[fdvec].mdfd_flags = 0; - if (fdvec >= CurFd) - { - Assert(fdvec == CurFd); - CurFd++; - } - return fdvec; - } - - /* Must allocate more room */ - - if (Nfds != CurFd) - elog(FATAL, "_fdvec_alloc error"); - - Nfds *= 2; - - nvec = (MdfdVec *) MemoryContextAlloc(MdCxt, Nfds * sizeof(MdfdVec)); - MemSet(nvec, 0, Nfds * sizeof(MdfdVec)); - memcpy(nvec, (char *) Md_fdvec, CurFd * sizeof(MdfdVec)); - pfree(Md_fdvec); + MdfdVec *v; - Md_fdvec = nvec; - - /* Set new free list */ - for (i = CurFd; i < Nfds; i++) - { - Md_fdvec[i].mdfd_nextFree = i + 1; - Md_fdvec[i].mdfd_flags = MDFD_FREE; - } - Md_fdvec[Nfds - 1].mdfd_nextFree = -1; - Md_Free = CurFd + 1; - - fdvec = CurFd; - CurFd++; - Md_fdvec[fdvec].mdfd_flags = 0; + v = (MdfdVec *) MemoryContextAlloc(MdCxt, sizeof(MdfdVec)); + v->mdfd_vfd = -1; +#ifndef LET_OS_MANAGE_FILESIZE + v->mdfd_chain = NULL; +#endif - return fdvec; + return v; } +#ifndef LET_OS_MANAGE_FILESIZE /* - * _fdvec_free() -- free md file descriptor vector. - * + * Open the specified segment of the relation, + * and make a MdfdVec object for it. Returns NULL on failure. */ -static -void -_fdvec_free(int fdvec) -{ - - Assert(Md_Free < 0 || Md_fdvec[Md_Free].mdfd_flags == MDFD_FREE); - Assert(Md_fdvec[fdvec].mdfd_flags != MDFD_FREE); - Md_fdvec[fdvec].mdfd_nextFree = Md_Free; - Md_fdvec[fdvec].mdfd_flags = MDFD_FREE; - Md_Free = fdvec; -} - static MdfdVec * -_mdfd_openseg(Relation reln, BlockNumber segno, int oflags) +_mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags) { MdfdVec *v; int fd; @@ -798,7 +640,7 @@ _mdfd_openseg(Relation reln, BlockNumber segno, int oflags) *fullpath; /* be sure we have enough space for the '.segno', if any */ - path = relpath(reln->rd_node); + path = relpath(reln->smgr_rnode); if (segno > 0) { @@ -818,61 +660,32 @@ _mdfd_openseg(Relation reln, BlockNumber segno, int oflags) return NULL; /* allocate an mdfdvec entry for it */ - v = (MdfdVec *) MemoryContextAlloc(MdCxt, sizeof(MdfdVec)); + v = _fdvec_alloc(); /* fill the entry */ v->mdfd_vfd = fd; - v->mdfd_flags = (uint16) 0; -#ifndef LET_OS_MANAGE_FILESIZE v->mdfd_chain = NULL; Assert(_mdnblocks(fd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE)); -#endif /* all done */ return v; } - -/* - * _mdfd_getrelnfd() -- Get the (virtual) fd for the relation, - * opening it if it's not already open - * - */ -static int -_mdfd_getrelnfd(Relation reln) -{ - int fd; - - fd = RelationGetFile(reln); - if (fd < 0) - { - if ((fd = mdopen(reln)) < 0) - elog(ERROR, "could not open relation \"%s\": %m", - RelationGetRelationName(reln)); - reln->rd_fd = fd; - } - return fd; -} +#endif /* * _mdfd_getseg() -- Find the segment of the relation holding the - * specified block - * + * specified block. ereport's on failure. */ static MdfdVec * -_mdfd_getseg(Relation reln, BlockNumber blkno) +_mdfd_getseg(SMgrRelation reln, BlockNumber blkno) { - MdfdVec *v; - int fd; + MdfdVec *v = mdopen(reln); #ifndef LET_OS_MANAGE_FILESIZE BlockNumber segno; BlockNumber i; -#endif - - fd = _mdfd_getrelnfd(reln); -#ifndef LET_OS_MANAGE_FILESIZE - for (v = &Md_fdvec[fd], segno = blkno / ((BlockNumber) RELSEG_SIZE), i = 1; + for (segno = blkno / ((BlockNumber) RELSEG_SIZE), i = 1; segno > 0; i++, segno--) { @@ -892,65 +705,24 @@ _mdfd_getseg(Relation reln, BlockNumber blkno) v->mdfd_chain = _mdfd_openseg(reln, i, (segno == 1) ? O_CREAT : 0); if (v->mdfd_chain == NULL) - elog(ERROR, "could not open segment %u of relation \"%s\" (target block %u): %m", - i, RelationGetRelationName(reln), blkno); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open segment %u of relation %u/%u (target block %u): %m", + i, + reln->smgr_rnode.tblNode, + reln->smgr_rnode.relNode, + blkno))); } v = v->mdfd_chain; } -#else - v = &Md_fdvec[fd]; #endif return v; } /* - * Find the segment of the relation holding the specified block. - * - * This performs the same work as _mdfd_getseg() except that we must work - * "blind" with no Relation struct. We assume that we are not likely to - * touch the same relation again soon, so we do not create an FD entry for - * the relation --- we just open a kernel file descriptor which will be - * used and promptly closed. We also assume that the target block already - * exists, ie, we need not extend the relation. - * - * The return value is the kernel descriptor, or -1 on failure. + * Get number of blocks present in a single disk file */ -static int -_mdfd_blind_getseg(RelFileNode rnode, BlockNumber blkno) -{ - char *path; - int fd; - -#ifndef LET_OS_MANAGE_FILESIZE - BlockNumber segno; -#endif - - path = relpath(rnode); - -#ifndef LET_OS_MANAGE_FILESIZE - /* append the '.segno', if needed */ - segno = blkno / ((BlockNumber) RELSEG_SIZE); - if (segno > 0) - { - char *segpath = (char *) palloc(strlen(path) + 12); - - sprintf(segpath, "%s.%u", path, segno); - pfree(path); - path = segpath; - } -#endif - - /* call fd.c to allow other FDs to be closed if needed */ - fd = BasicOpenFile(path, O_RDWR | PG_BINARY, 0600); - if (fd < 0) - elog(LOG, "could not open \"%s\": %m", path); - - pfree(path); - - return fd; -} - static BlockNumber _mdnblocks(File file, Size blcksz) { diff --git a/src/backend/storage/smgr/mm.c b/src/backend/storage/smgr/mm.c deleted file mode 100644 index 5043fd66a57..00000000000 --- a/src/backend/storage/smgr/mm.c +++ /dev/null @@ -1,552 +0,0 @@ -/*------------------------------------------------------------------------- - * - * mm.c - * main memory storage manager - * - * This code manages relations that reside in (presumably stable) - * main memory. - * - * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/mm.c,v 1.36 2004/01/07 18:56:27 neilc Exp $ - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - -#include <math.h> - -#include "storage/smgr.h" -#include "miscadmin.h" - - -#ifdef STABLE_MEMORY_STORAGE - -/* - * MMCacheTag -- Unique triplet for blocks stored by the main memory - * storage manager. - */ - -typedef struct MMCacheTag -{ - Oid mmct_dbid; - Oid mmct_relid; - BlockNumber mmct_blkno; -} MMCacheTag; - -/* - * Shared-memory hash table for main memory relations contains - * entries of this form. - */ - -typedef struct MMHashEntry -{ - MMCacheTag mmhe_tag; - int mmhe_bufno; -} MMHashEntry; - -/* - * MMRelTag -- Unique identifier for each relation that is stored in the - * main-memory storage manager. - */ - -typedef struct MMRelTag -{ - Oid mmrt_dbid; - Oid mmrt_relid; -} MMRelTag; - -/* - * Shared-memory hash table for # blocks in main memory relations contains - * entries of this form. - */ - -typedef struct MMRelHashEntry -{ - MMRelTag mmrhe_tag; - int mmrhe_nblocks; -} MMRelHashEntry; - -#define MMNBUFFERS 10 -#define MMNRELATIONS 2 - -static int *MMCurTop; -static int *MMCurRelno; -static MMCacheTag *MMBlockTags; -static char *MMBlockCache; -static HTAB *MMCacheHT; -static HTAB *MMRelCacheHT; - -int -mminit(void) -{ - char *mmcacheblk; - int mmsize = 0; - bool found; - HASHCTL info; - - LWLockAcquire(MMCacheLock, LW_EXCLUSIVE); - - mmsize += MAXALIGN(BLCKSZ * MMNBUFFERS); - mmsize += MAXALIGN(sizeof(*MMCurTop)); - mmsize += MAXALIGN(sizeof(*MMCurRelno)); - mmsize += MAXALIGN((MMNBUFFERS * sizeof(MMCacheTag))); - mmcacheblk = (char *) ShmemInitStruct("Main memory smgr", mmsize, &found); - - if (mmcacheblk == NULL) - { - LWLockRelease(MMCacheLock); - return SM_FAIL; - } - - info.keysize = sizeof(MMCacheTag); - info.entrysize = sizeof(MMHashEntry); - info.hash = tag_hash; - - MMCacheHT = ShmemInitHash("Main memory store HT", - MMNBUFFERS, MMNBUFFERS, - &info, (HASH_ELEM | HASH_FUNCTION)); - - if (MMCacheHT == NULL) - { - LWLockRelease(MMCacheLock); - return SM_FAIL; - } - - info.keysize = sizeof(MMRelTag); - info.entrysize = sizeof(MMRelHashEntry); - info.hash = tag_hash; - - MMRelCacheHT = ShmemInitHash("Main memory rel HT", - MMNRELATIONS, MMNRELATIONS, - &info, (HASH_ELEM | HASH_FUNCTION)); - - if (MMRelCacheHT == NULL) - { - LWLockRelease(MMCacheLock); - return SM_FAIL; - } - - if (IsUnderPostmaster) /* was IsPostmaster bjm */ - { - MemSet(mmcacheblk, 0, mmsize); - LWLockRelease(MMCacheLock); - return SM_SUCCESS; - } - - LWLockRelease(MMCacheLock); - - MMCurTop = (int *) mmcacheblk; - mmcacheblk += sizeof(int); - MMCurRelno = (int *) mmcacheblk; - mmcacheblk += sizeof(int); - MMBlockTags = (MMCacheTag *) mmcacheblk; - mmcacheblk += (MMNBUFFERS * sizeof(MMCacheTag)); - MMBlockCache = mmcacheblk; - - return SM_SUCCESS; -} - -int -mmshutdown(void) -{ - return SM_SUCCESS; -} - -int -mmcreate(Relation reln) -{ - MMRelHashEntry *entry; - bool found; - MMRelTag tag; - - LWLockAcquire(MMCacheLock, LW_EXCLUSIVE); - - if (*MMCurRelno == MMNRELATIONS) - { - LWLockRelease(MMCacheLock); - return SM_FAIL; - } - - (*MMCurRelno)++; - - tag.mmrt_relid = RelationGetRelid(reln); - if (reln->rd_rel->relisshared) - tag.mmrt_dbid = (Oid) 0; - else - tag.mmrt_dbid = MyDatabaseId; - - entry = (MMRelHashEntry *) hash_search(MMRelCacheHT, - (void *) &tag, - HASH_ENTER, &found); - - if (entry == NULL) - { - LWLockRelease(MMCacheLock); - ereport(FATAL, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - } - - if (found) - { - /* already exists */ - LWLockRelease(MMCacheLock); - return SM_FAIL; - } - - entry->mmrhe_nblocks = 0; - - LWLockRelease(MMCacheLock); - - return SM_SUCCESS; -} - -/* - * mmunlink() -- Unlink a relation. - * - * XXX currently broken: needs to accept RelFileNode, not Relation - */ -int -mmunlink(RelFileNode rnode) -{ - int i; - MMHashEntry *entry; - MMRelHashEntry *rentry; - MMRelTag rtag; - - LWLockAcquire(MMCacheLock, LW_EXCLUSIVE); - - for (i = 0; i < MMNBUFFERS; i++) - { - if (MMBlockTags[i].mmct_dbid == rnode.tblNode - && MMBlockTags[i].mmct_relid == rnode.relNode) - { - entry = (MMHashEntry *) hash_search(MMCacheHT, - (void *) &MMBlockTags[i], - HASH_REMOVE, NULL); - if (entry == NULL) - { - LWLockRelease(MMCacheLock); - elog(FATAL, "cache hash table corrupted"); - } - MMBlockTags[i].mmct_dbid = (Oid) 0; - MMBlockTags[i].mmct_relid = (Oid) 0; - MMBlockTags[i].mmct_blkno = (BlockNumber) 0; - } - } - rtag.mmrt_dbid = rnode.tblNode; - rtag.mmrt_relid = rnode.relNode; - - rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, - (void *) &rtag, - HASH_REMOVE, NULL); - - if (rentry == NULL) - { - LWLockRelease(MMCacheLock); - elog(FATAL, "rel cache hash table corrupted"); - } - - (*MMCurRelno)--; - - LWLockRelease(MMCacheLock); - return 1; -} - -/* - * mmextend() -- Add a block to the specified relation. - * - * This routine returns SM_FAIL or SM_SUCCESS, with errno set as - * appropriate. - */ -int -mmextend(Relation reln, BlockNumber blocknum, char *buffer) -{ - MMRelHashEntry *rentry; - MMHashEntry *entry; - int i; - Oid reldbid; - int offset; - bool found; - MMRelTag rtag; - MMCacheTag tag; - - if (reln->rd_rel->relisshared) - reldbid = (Oid) 0; - else - reldbid = MyDatabaseId; - - tag.mmct_dbid = rtag.mmrt_dbid = reldbid; - tag.mmct_relid = rtag.mmrt_relid = RelationGetRelid(reln); - - LWLockAcquire(MMCacheLock, LW_EXCLUSIVE); - - if (*MMCurTop == MMNBUFFERS) - { - for (i = 0; i < MMNBUFFERS; i++) - { - if (MMBlockTags[i].mmct_dbid == 0 && - MMBlockTags[i].mmct_relid == 0) - break; - } - if (i == MMNBUFFERS) - { - LWLockRelease(MMCacheLock); - return SM_FAIL; - } - } - else - { - i = *MMCurTop; - (*MMCurTop)++; - } - - rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, - (void *) &rtag, - HASH_FIND, NULL); - if (rentry == NULL) - { - LWLockRelease(MMCacheLock); - elog(FATAL, "rel cache hash table corrupted"); - } - - tag.mmct_blkno = rentry->mmrhe_nblocks; - - entry = (MMHashEntry *) hash_search(MMCacheHT, - (void *) &tag, - HASH_ENTER, &found); - if (entry == NULL || found) - { - LWLockRelease(MMCacheLock); - elog(FATAL, "cache hash table corrupted"); - } - - entry->mmhe_bufno = i; - MMBlockTags[i].mmct_dbid = reldbid; - MMBlockTags[i].mmct_relid = RelationGetRelid(reln); - MMBlockTags[i].mmct_blkno = rentry->mmrhe_nblocks; - - /* page numbers are zero-based, so we increment this at the end */ - (rentry->mmrhe_nblocks)++; - - /* write the extended page */ - offset = (i * BLCKSZ); - memmove(&(MMBlockCache[offset]), buffer, BLCKSZ); - - LWLockRelease(MMCacheLock); - - return SM_SUCCESS; -} - -/* - * mmopen() -- Open the specified relation. - */ -int -mmopen(Relation reln) -{ - /* automatically successful */ - return 0; -} - -/* - * mmclose() -- Close the specified relation. - * - * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. - */ -int -mmclose(Relation reln) -{ - /* automatically successful */ - return SM_SUCCESS; -} - -/* - * mmread() -- Read the specified block from a relation. - * - * Returns SM_SUCCESS or SM_FAIL. - */ -int -mmread(Relation reln, BlockNumber blocknum, char *buffer) -{ - MMHashEntry *entry; - int offset; - MMCacheTag tag; - - if (reln->rd_rel->relisshared) - tag.mmct_dbid = (Oid) 0; - else - tag.mmct_dbid = MyDatabaseId; - - tag.mmct_relid = RelationGetRelid(reln); - tag.mmct_blkno = blocknum; - - LWLockAcquire(MMCacheLock, LW_EXCLUSIVE); - entry = (MMHashEntry *) hash_search(MMCacheHT, - (void *) &tag, - HASH_FIND, NULL); - - if (entry == NULL) - { - /* reading nonexistent pages is defined to fill them with zeroes */ - LWLockRelease(MMCacheLock); - MemSet(buffer, 0, BLCKSZ); - return SM_SUCCESS; - } - - offset = (entry->mmhe_bufno * BLCKSZ); - memmove(buffer, &MMBlockCache[offset], BLCKSZ); - - LWLockRelease(MMCacheLock); - - return SM_SUCCESS; -} - -/* - * mmwrite() -- Write the supplied block at the appropriate location. - * - * Returns SM_SUCCESS or SM_FAIL. - */ -int -mmwrite(Relation reln, BlockNumber blocknum, char *buffer) -{ - MMHashEntry *entry; - int offset; - MMCacheTag tag; - - if (reln->rd_rel->relisshared) - tag.mmct_dbid = (Oid) 0; - else - tag.mmct_dbid = MyDatabaseId; - - tag.mmct_relid = RelationGetRelid(reln); - tag.mmct_blkno = blocknum; - - LWLockAcquire(MMCacheLock, LW_EXCLUSIVE); - entry = (MMHashEntry *) hash_search(MMCacheHT, - (void *) &tag, - HASH_FIND, NULL); - - if (entry == NULL) - { - LWLockRelease(MMCacheLock); - elog(FATAL, "cache hash table missing requested page"); - } - - offset = (entry->mmhe_bufno * BLCKSZ); - memmove(&MMBlockCache[offset], buffer, BLCKSZ); - - LWLockRelease(MMCacheLock); - - return SM_SUCCESS; -} - -/* - * mmblindwrt() -- Write a block to stable storage blind. - * - * We have to be able to do this using only the rnode of the relation - * in which the block belongs. Otherwise this is much like mmwrite(). - */ -int -mmblindwrt(RelFileNode rnode, - BlockNumber blkno, - char *buffer) -{ - return SM_FAIL; -} - -/* - * mmnblocks() -- Get the number of blocks stored in a relation. - * - * Returns # of blocks or InvalidBlockNumber on error. - */ -BlockNumber -mmnblocks(Relation reln) -{ - MMRelTag rtag; - MMRelHashEntry *rentry; - BlockNumber nblocks; - - if (reln->rd_rel->relisshared) - rtag.mmrt_dbid = (Oid) 0; - else - rtag.mmrt_dbid = MyDatabaseId; - - rtag.mmrt_relid = RelationGetRelid(reln); - - LWLockAcquire(MMCacheLock, LW_EXCLUSIVE); - - rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, - (void *) &rtag, - HASH_FIND, NULL); - - if (rentry) - nblocks = rentry->mmrhe_nblocks; - else - nblocks = InvalidBlockNumber; - - LWLockRelease(MMCacheLock); - - return nblocks; -} - -/* - * mmcommit() -- Commit a transaction. - * - * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. - */ -int -mmcommit(void) -{ - return SM_SUCCESS; -} - -/* - * mmabort() -- Abort a transaction. - */ - -int -mmabort(void) -{ - return SM_SUCCESS; -} - -/* - * MMShmemSize() -- Declare amount of shared memory we require. - * - * The shared memory initialization code creates a block of shared - * memory exactly big enough to hold all the structures it needs to. - * This routine declares how much space the main memory storage - * manager will use. - */ -int -MMShmemSize(void) -{ - int size = 0; - - /* - * first compute space occupied by the (dbid,relid,blkno) hash table - */ - size += hash_estimate_size(MMNBUFFERS, sizeof(MMHashEntry)); - - /* - * now do the same for the rel hash table - */ - size += hash_estimate_size(MMNRELATIONS, sizeof(MMRelHashEntry)); - - /* - * finally, add in the memory block we use directly - */ - - size += MAXALIGN(BLCKSZ * MMNBUFFERS); - size += MAXALIGN(sizeof(*MMCurTop)); - size += MAXALIGN(sizeof(*MMCurRelno)); - size += MAXALIGN(MMNBUFFERS * sizeof(MMCacheTag)); - - return size; -} - -#endif /* STABLE_MEMORY_STORAGE */ diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 0e33af5f281..09ee4144c50 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.68 2004/01/06 18:07:31 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.69 2004/02/10 01:55:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -21,72 +21,52 @@ #include "storage/freespace.h" #include "storage/ipc.h" #include "storage/smgr.h" +#include "utils/hsearch.h" #include "utils/memutils.h" -static void smgrshutdown(int code, Datum arg); - +/* + * This struct of function pointers defines the API between smgr.c and + * any individual storage manager module. Note that smgr subfunctions are + * generally expected to return TRUE on success, FALSE on error. (For + * nblocks and truncate we instead say that returning InvalidBlockNumber + * indicates an error.) + */ typedef struct f_smgr { - int (*smgr_init) (void); /* may be NULL */ - int (*smgr_shutdown) (void); /* may be NULL */ - int (*smgr_create) (Relation reln); - int (*smgr_unlink) (RelFileNode rnode); - int (*smgr_extend) (Relation reln, BlockNumber blocknum, + bool (*smgr_init) (void); /* may be NULL */ + bool (*smgr_shutdown) (void); /* may be NULL */ + bool (*smgr_close) (SMgrRelation reln); + bool (*smgr_create) (SMgrRelation reln, bool isRedo); + bool (*smgr_unlink) (RelFileNode rnode, bool isRedo); + bool (*smgr_extend) (SMgrRelation reln, BlockNumber blocknum, char *buffer); - int (*smgr_open) (Relation reln); - int (*smgr_close) (Relation reln); - int (*smgr_read) (Relation reln, BlockNumber blocknum, + bool (*smgr_read) (SMgrRelation reln, BlockNumber blocknum, char *buffer); - int (*smgr_write) (Relation reln, BlockNumber blocknum, + bool (*smgr_write) (SMgrRelation reln, BlockNumber blocknum, char *buffer); - int (*smgr_blindwrt) (RelFileNode rnode, BlockNumber blkno, - char *buffer); - BlockNumber (*smgr_nblocks) (Relation reln); - BlockNumber (*smgr_truncate) (Relation reln, BlockNumber nblocks); - int (*smgr_commit) (void); /* may be NULL */ - int (*smgr_abort) (void); /* may be NULL */ - int (*smgr_sync) (void); + BlockNumber (*smgr_nblocks) (SMgrRelation reln); + BlockNumber (*smgr_truncate) (SMgrRelation reln, BlockNumber nblocks); + bool (*smgr_commit) (void); /* may be NULL */ + bool (*smgr_abort) (void); /* may be NULL */ + bool (*smgr_sync) (void); /* may be NULL */ } f_smgr; -/* - * The weird placement of commas in this init block is to keep the compiler - * happy, regardless of what storage managers we have (or don't have). - */ - -static f_smgr smgrsw[] = { +static const f_smgr smgrsw[] = { /* magnetic disk */ - {mdinit, NULL, mdcreate, mdunlink, mdextend, mdopen, mdclose, - mdread, mdwrite, mdblindwrt, - mdnblocks, mdtruncate, mdcommit, mdabort, mdsync - }, - -#ifdef STABLE_MEMORY_STORAGE - /* main memory */ - {mminit, mmshutdown, mmcreate, mmunlink, mmextend, mmopen, mmclose, - mmread, mmwrite, mmblindwrt, - mmnblocks, NULL, mmcommit, mmabort, NULL}, -#endif + {mdinit, NULL, mdclose, mdcreate, mdunlink, mdextend, + mdread, mdwrite, mdnblocks, mdtruncate, mdcommit, mdabort, mdsync + } }; -/* - * This array records which storage managers are write-once, and which - * support overwrite. A 'true' entry means that the storage manager is - * write-once. In the best of all possible worlds, there would be no - * write-once storage managers. - */ +static const int NSmgr = lengthof(smgrsw); -#ifdef NOT_USED -static bool smgrwo[] = { - false, /* magnetic disk */ -#ifdef STABLE_MEMORY_STORAGE - false, /* main memory */ -#endif -}; -#endif -static int NSmgr = lengthof(smgrsw); +/* + * Each backend has a hashtable that stores all extant SMgrRelation objects. + */ +static HTAB *SMgrRelationHash = NULL; /* * We keep a list of all relations (represented as RelFileNode values) @@ -105,7 +85,7 @@ static int NSmgr = lengthof(smgrsw); typedef struct PendingRelDelete { RelFileNode relnode; /* relation that may need to be deleted */ - int16 which; /* which storage manager? */ + int which; /* which storage manager? */ bool isTemp; /* is it a temporary relation? */ bool atCommit; /* T=delete at commit; F=delete at abort */ struct PendingRelDelete *next; /* linked-list link */ @@ -114,12 +94,20 @@ typedef struct PendingRelDelete static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */ +/* local function prototypes */ +static void smgrshutdown(int code, Datum arg); +static void smgr_internal_unlink(RelFileNode rnode, int which, + bool isTemp, bool isRedo); + + /* * smgrinit(), smgrshutdown() -- Initialize or shut down all storage * managers. * + * Note: in the normal multiprocess scenario with a postmaster, these are + * called at postmaster start and stop, not per-backend. */ -int +void smgrinit(void) { int i; @@ -128,7 +116,7 @@ smgrinit(void) { if (smgrsw[i].smgr_init) { - if ((*(smgrsw[i].smgr_init)) () == SM_FAIL) + if (! (*(smgrsw[i].smgr_init)) ()) elog(FATAL, "smgr initialization failed on %s: %m", DatumGetCString(DirectFunctionCall1(smgrout, Int16GetDatum(i)))); @@ -137,8 +125,6 @@ smgrinit(void) /* register the shutdown proc */ on_proc_exit(smgrshutdown, 0); - - return SM_SUCCESS; } static void @@ -150,7 +136,7 @@ smgrshutdown(int code, Datum arg) { if (smgrsw[i].smgr_shutdown) { - if ((*(smgrsw[i].smgr_shutdown)) () == SM_FAIL) + if (! (*(smgrsw[i].smgr_shutdown)) ()) elog(FATAL, "smgr shutdown failed on %s: %m", DatumGetCString(DirectFunctionCall1(smgrout, Int16GetDatum(i)))); @@ -159,57 +145,177 @@ smgrshutdown(int code, Datum arg) } /* + * smgropen() -- Return an SMgrRelation object, creating it if need be. + * + * This does not attempt to actually open the object. + */ +SMgrRelation +smgropen(RelFileNode rnode) +{ + SMgrRelation reln; + bool found; + + if (SMgrRelationHash == NULL) + { + /* First time through: initialize the hash table */ + HASHCTL ctl; + + MemSet(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(RelFileNode); + ctl.entrysize = sizeof(SMgrRelationData); + ctl.hash = tag_hash; + SMgrRelationHash = hash_create("smgr relation table", 400, + &ctl, HASH_ELEM | HASH_FUNCTION); + } + + /* Look up or create an entry */ + reln = (SMgrRelation) hash_search(SMgrRelationHash, + (void *) &rnode, + HASH_ENTER, &found); + if (reln == NULL) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + + /* Initialize it if not present before */ + if (!found) + { + /* hash_search already filled in the lookup key */ + reln->smgr_which = 0; /* we only have md.c at present */ + reln->md_fd = NULL; /* mark it not open */ + } + + return reln; +} + +/* + * smgrclose() -- Close and delete an SMgrRelation object. + * + * It is the caller's responsibility not to leave any dangling references + * to the object. (Pointers should be cleared after successful return; + * on the off chance of failure, the SMgrRelation object will still exist.) + */ +void +smgrclose(SMgrRelation reln) +{ + if (! (*(smgrsw[reln->smgr_which].smgr_close)) (reln)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not close relation %u/%u: %m", + reln->smgr_rnode.tblNode, + reln->smgr_rnode.relNode))); + + if (hash_search(SMgrRelationHash, + (void *) &(reln->smgr_rnode), + HASH_REMOVE, NULL) == NULL) + elog(ERROR, "SMgrRelation hashtable corrupted"); +} + +/* + * smgrcloseall() -- Close all existing SMgrRelation objects. + * + * It is the caller's responsibility not to leave any dangling references. + */ +void +smgrcloseall(void) +{ + HASH_SEQ_STATUS status; + SMgrRelation reln; + + /* Nothing to do if hashtable not set up */ + if (SMgrRelationHash == NULL) + return; + + hash_seq_init(&status, SMgrRelationHash); + + while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL) + { + smgrclose(reln); + } +} + +/* + * smgrclosenode() -- Close SMgrRelation object for given RelFileNode, + * if one exists. + * + * This has the same effects as smgrclose(smgropen(rnode)), but it avoids + * uselessly creating a hashtable entry only to drop it again when no + * such entry exists already. + * + * It is the caller's responsibility not to leave any dangling references. + */ +void +smgrclosenode(RelFileNode rnode) +{ + SMgrRelation reln; + + /* Nothing to do if hashtable not set up */ + if (SMgrRelationHash == NULL) + return; + + reln = (SMgrRelation) hash_search(SMgrRelationHash, + (void *) &rnode, + HASH_FIND, NULL); + if (reln != NULL) + smgrclose(reln); +} + +/* * smgrcreate() -- Create a new relation. * - * This routine takes a reldesc, creates the relation on the appropriate - * device, and returns a file descriptor for it. + * Given an already-created (but presumably unused) SMgrRelation, + * cause the underlying disk file or other storage to be created. + * + * If isRedo is true, it is okay for the underlying file to exist + * already because we are in a WAL replay sequence. In this case + * we should make no PendingRelDelete entry; the WAL sequence will + * tell whether to drop the file. */ -int -smgrcreate(int16 which, Relation reln) +void +smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo) { - int fd; PendingRelDelete *pending; - if ((fd = (*(smgrsw[which].smgr_create)) (reln)) < 0) + if (! (*(smgrsw[reln->smgr_which].smgr_create)) (reln, isRedo)) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not create relation \"%s\": %m", - RelationGetRelationName(reln)))); + errmsg("could not create relation %u/%u: %m", + reln->smgr_rnode.tblNode, + reln->smgr_rnode.relNode))); + + if (isRedo) + return; /* Add the relation to the list of stuff to delete at abort */ pending = (PendingRelDelete *) MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); - pending->relnode = reln->rd_node; - pending->which = which; - pending->isTemp = reln->rd_istemp; + pending->relnode = reln->smgr_rnode; + pending->which = reln->smgr_which; + pending->isTemp = isTemp; pending->atCommit = false; /* delete if abort */ pending->next = pendingDeletes; pendingDeletes = pending; - - return fd; } /* - * smgrunlink() -- Unlink a relation. + * smgrscheduleunlink() -- Schedule unlinking a relation at xact commit. + * + * The relation is marked to be removed from the store if we + * successfully commit the current transaction. * - * The relation is removed from the store. Actually, we just remember - * that we want to do this at transaction commit. + * This also implies smgrclose() on the SMgrRelation object. */ -int -smgrunlink(int16 which, Relation reln) +void +smgrscheduleunlink(SMgrRelation reln, bool isTemp) { PendingRelDelete *pending; - /* Make sure the file is closed */ - if (reln->rd_fd >= 0) - smgrclose(which, reln); - /* Add the relation to the list of stuff to delete at commit */ pending = (PendingRelDelete *) MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); - pending->relnode = reln->rd_node; - pending->which = which; - pending->isTemp = reln->rd_istemp; + pending->relnode = reln->smgr_rnode; + pending->which = reln->smgr_which; + pending->isTemp = isTemp; pending->atCommit = true; /* delete if commit */ pending->next = pendingDeletes; pendingDeletes = pending; @@ -224,78 +330,83 @@ smgrunlink(int16 which, Relation reln) * immediately, but for now I'll keep the logic simple. */ - return SM_SUCCESS; + /* Now close the file and throw away the hashtable entry */ + smgrclose(reln); } /* - * smgrextend() -- Add a new block to a file. + * smgrdounlink() -- Immediately unlink a relation. * - * The semantics are basically the same as smgrwrite(): write at the - * specified position. However, we are expecting to extend the - * relation (ie, blocknum is the current EOF), and so in case of - * failure we clean up by truncating. + * The relation is removed from the store. This should not be used + * during transactional operations, since it can't be undone. * - * Returns SM_SUCCESS on success; aborts the current transaction on - * failure. + * If isRedo is true, it is okay for the underlying file to be gone + * already. (In practice isRedo will always be true.) + * + * This also implies smgrclose() on the SMgrRelation object. */ -int -smgrextend(int16 which, Relation reln, BlockNumber blocknum, char *buffer) +void +smgrdounlink(SMgrRelation reln, bool isTemp, bool isRedo) { - int status; + RelFileNode rnode = reln->smgr_rnode; + int which = reln->smgr_which; - status = (*(smgrsw[which].smgr_extend)) (reln, blocknum, buffer); - - if (status == SM_FAIL) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not extend relation \"%s\": %m", - RelationGetRelationName(reln)), - errhint("Check free disk space."))); + /* Close the file and throw away the hashtable entry */ + smgrclose(reln); - return status; + smgr_internal_unlink(rnode, which, isTemp, isRedo); } /* - * smgropen() -- Open a relation using a particular storage manager. - * - * Returns the fd for the open relation on success. - * - * On failure, returns -1 if failOK, else aborts the transaction. + * Shared subroutine that actually does the unlink ... */ -int -smgropen(int16 which, Relation reln, bool failOK) +static void +smgr_internal_unlink(RelFileNode rnode, int which, bool isTemp, bool isRedo) { - int fd; - - if (reln->rd_rel->relkind == RELKIND_VIEW) - return -1; - if (reln->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) - return -1; - if ((fd = (*(smgrsw[which].smgr_open)) (reln)) < 0) - if (!failOK) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not open file \"%s\": %m", - RelationGetRelationName(reln)))); - - return fd; + /* + * Get rid of any leftover buffers for the rel (shouldn't be any in the + * commit case, but there can be in the abort case). + */ + DropRelFileNodeBuffers(rnode, isTemp); + + /* + * Tell the free space map to forget this relation. It won't be accessed + * any more anyway, but we may as well recycle the map space quickly. + */ + FreeSpaceMapForgetRel(&rnode); + + /* + * And delete the physical files. + * + * Note: we treat deletion failure as a WARNING, not an error, + * because we've already decided to commit or abort the current xact. + */ + if (! (*(smgrsw[which].smgr_unlink)) (rnode, isRedo)) + ereport(WARNING, + (errcode_for_file_access(), + errmsg("could not unlink relation %u/%u: %m", + rnode.tblNode, + rnode.relNode))); } /* - * smgrclose() -- Close a relation. + * smgrextend() -- Add a new block to a file. * - * Returns SM_SUCCESS on success, aborts on failure. + * The semantics are basically the same as smgrwrite(): write at the + * specified position. However, we are expecting to extend the + * relation (ie, blocknum is the current EOF), and so in case of + * failure we clean up by truncating. */ -int -smgrclose(int16 which, Relation reln) +void +smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer) { - if ((*(smgrsw[which].smgr_close)) (reln) == SM_FAIL) + if (! (*(smgrsw[reln->smgr_which].smgr_extend)) (reln, blocknum, buffer)) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not close relation \"%s\": %m", - RelationGetRelationName(reln)))); - - return SM_SUCCESS; + errmsg("could not extend relation %u/%u: %m", + reln->smgr_rnode.tblNode, + reln->smgr_rnode.relNode), + errhint("Check free disk space."))); } /* @@ -304,24 +415,18 @@ smgrclose(int16 which, Relation reln) * * This routine is called from the buffer manager in order to * instantiate pages in the shared buffer cache. All storage managers - * return pages in the format that POSTGRES expects. This routine - * dispatches the read. On success, it returns SM_SUCCESS. On failure, - * the current transaction is aborted. + * return pages in the format that POSTGRES expects. */ -int -smgrread(int16 which, Relation reln, BlockNumber blocknum, char *buffer) +void +smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer) { - int status; - - status = (*(smgrsw[which].smgr_read)) (reln, blocknum, buffer); - - if (status == SM_FAIL) + if (! (*(smgrsw[reln->smgr_which].smgr_read)) (reln, blocknum, buffer)) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not read block %d of relation \"%s\": %m", - blocknum, RelationGetRelationName(reln)))); - - return status; + errmsg("could not read block %u of relation %u/%u: %m", + blocknum, + reln->smgr_rnode.tblNode, + reln->smgr_rnode.relNode))); } /* @@ -329,56 +434,17 @@ smgrread(int16 which, Relation reln, BlockNumber blocknum, char *buffer) * * This is not a synchronous write -- the block is not necessarily * on disk at return, only dumped out to the kernel. - * - * The buffer is written out via the appropriate - * storage manager. This routine returns SM_SUCCESS or aborts - * the current transaction. */ -int -smgrwrite(int16 which, Relation reln, BlockNumber blocknum, char *buffer) -{ - int status; - - status = (*(smgrsw[which].smgr_write)) (reln, blocknum, buffer); - - if (status == SM_FAIL) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not write block %d of relation \"%s\": %m", - blocknum, RelationGetRelationName(reln)))); - - return status; -} - -/* - * smgrblindwrt() -- Write a page out blind. - * - * In some cases, we may find a page in the buffer cache that we - * can't make a reldesc for. This happens, for example, when we - * want to reuse a dirty page that was written by a transaction - * that has not yet committed, which created a new relation. In - * this case, the buffer manager will call smgrblindwrt() with - * the name and OID of the database and the relation to which the - * buffer belongs. Every storage manager must be able to write - * this page out to stable storage in this circumstance. - */ -int -smgrblindwrt(int16 which, - RelFileNode rnode, - BlockNumber blkno, - char *buffer) +void +smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer) { - int status; - - status = (*(smgrsw[which].smgr_blindwrt)) (rnode, blkno, buffer); - - if (status == SM_FAIL) + if (! (*(smgrsw[reln->smgr_which].smgr_write)) (reln, blocknum, buffer)) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not write block %d of %u/%u blind: %m", - blkno, rnode.tblNode, rnode.relNode))); - - return status; + errmsg("could not write block %u of relation %u/%u: %m", + blocknum, + reln->smgr_rnode.tblNode, + reln->smgr_rnode.relNode))); } /* @@ -389,11 +455,11 @@ smgrblindwrt(int16 which, * transaction on failure. */ BlockNumber -smgrnblocks(int16 which, Relation reln) +smgrnblocks(SMgrRelation reln) { BlockNumber nblocks; - nblocks = (*(smgrsw[which].smgr_nblocks)) (reln); + nblocks = (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln); /* * NOTE: if a relation ever did grow to 2^32-1 blocks, this code would @@ -404,8 +470,9 @@ smgrnblocks(int16 which, Relation reln) if (nblocks == InvalidBlockNumber) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not count blocks of relation \"%s\": %m", - RelationGetRelationName(reln)))); + errmsg("could not count blocks of relation %u/%u: %m", + reln->smgr_rnode.tblNode, + reln->smgr_rnode.relNode))); return nblocks; } @@ -418,27 +485,25 @@ smgrnblocks(int16 which, Relation reln) * transaction on failure. */ BlockNumber -smgrtruncate(int16 which, Relation reln, BlockNumber nblocks) +smgrtruncate(SMgrRelation reln, BlockNumber nblocks) { BlockNumber newblks; - newblks = nblocks; - if (smgrsw[which].smgr_truncate) - { - /* - * Tell the free space map to forget anything it may have stored - * for the about-to-be-deleted blocks. We want to be sure it - * won't return bogus block numbers later on. - */ - FreeSpaceMapTruncateRel(&reln->rd_node, nblocks); - - newblks = (*(smgrsw[which].smgr_truncate)) (reln, nblocks); - if (newblks == InvalidBlockNumber) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not truncate relation \"%s\" to %u blocks: %m", - RelationGetRelationName(reln), nblocks))); - } + /* + * Tell the free space map to forget anything it may have stored + * for the about-to-be-deleted blocks. We want to be sure it + * won't return bogus block numbers later on. + */ + FreeSpaceMapTruncateRel(&reln->smgr_rnode, nblocks); + + newblks = (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, nblocks); + if (newblks == InvalidBlockNumber) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not truncate relation %u/%u to %u blocks: %m", + reln->smgr_rnode.tblNode, + reln->smgr_rnode.relNode, + nblocks))); return newblks; } @@ -446,7 +511,7 @@ smgrtruncate(int16 which, Relation reln, BlockNumber nblocks) /* * smgrDoPendingDeletes() -- Take care of relation deletes at end of xact. */ -int +void smgrDoPendingDeletes(bool isCommit) { while (pendingDeletes != NULL) @@ -455,39 +520,12 @@ smgrDoPendingDeletes(bool isCommit) pendingDeletes = pending->next; if (pending->atCommit == isCommit) - { - /* - * Get rid of any leftover buffers for the rel (shouldn't be - * any in the commit case, but there can be in the abort - * case). - */ - DropRelFileNodeBuffers(pending->relnode, pending->isTemp); - - /* - * Tell the free space map to forget this relation. It won't - * be accessed any more anyway, but we may as well recycle the - * map space quickly. - */ - FreeSpaceMapForgetRel(&pending->relnode); - - /* - * And delete the physical files. - * - * Note: we treat deletion failure as a WARNING, not an error, - * because we've already decided to commit or abort the - * current xact. - */ - if ((*(smgrsw[pending->which].smgr_unlink)) (pending->relnode) == SM_FAIL) - ereport(WARNING, - (errcode_for_file_access(), - errmsg("could not unlink %u/%u: %m", - pending->relnode.tblNode, - pending->relnode.relNode))); - } + smgr_internal_unlink(pending->relnode, + pending->which, + pending->isTemp, + false); pfree(pending); } - - return SM_SUCCESS; } /* @@ -496,7 +534,7 @@ smgrDoPendingDeletes(bool isCommit) * * This is called before we actually commit. */ -int +void smgrcommit(void) { int i; @@ -505,20 +543,18 @@ smgrcommit(void) { if (smgrsw[i].smgr_commit) { - if ((*(smgrsw[i].smgr_commit)) () == SM_FAIL) + if (! (*(smgrsw[i].smgr_commit)) ()) elog(FATAL, "transaction commit failed on %s: %m", DatumGetCString(DirectFunctionCall1(smgrout, Int16GetDatum(i)))); } } - - return SM_SUCCESS; } /* * smgrabort() -- Abort changes made during the current transaction. */ -int +void smgrabort(void) { int i; @@ -527,20 +563,18 @@ smgrabort(void) { if (smgrsw[i].smgr_abort) { - if ((*(smgrsw[i].smgr_abort)) () == SM_FAIL) + if (! (*(smgrsw[i].smgr_abort)) ()) elog(FATAL, "transaction abort failed on %s: %m", DatumGetCString(DirectFunctionCall1(smgrout, Int16GetDatum(i)))); } } - - return SM_SUCCESS; } /* * smgrsync() -- Sync files to disk at checkpoint time. */ -int +void smgrsync(void) { int i; @@ -549,26 +583,14 @@ smgrsync(void) { if (smgrsw[i].smgr_sync) { - if ((*(smgrsw[i].smgr_sync)) () == SM_FAIL) + if (! (*(smgrsw[i].smgr_sync)) ()) elog(PANIC, "storage sync failed on %s: %m", DatumGetCString(DirectFunctionCall1(smgrout, Int16GetDatum(i)))); } } - - return SM_SUCCESS; } -#ifdef NOT_USED -bool -smgriswo(int16 smgrno) -{ - if (smgrno < 0 || smgrno >= NSmgr) - elog(ERROR, "invalid storage manager id: %d", smgrno); - - return smgrwo[smgrno]; -} -#endif void smgr_redo(XLogRecPtr lsn, XLogRecord *record) diff --git a/src/backend/storage/smgr/smgrtype.c b/src/backend/storage/smgr/smgrtype.c index 10e08452418..60cc305bd1b 100644 --- a/src/backend/storage/smgr/smgrtype.c +++ b/src/backend/storage/smgr/smgrtype.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/smgrtype.c,v 1.22 2003/11/29 19:51:57 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/smgrtype.c,v 1.23 2004/02/10 01:55:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,26 +16,21 @@ #include "storage/smgr.h" + typedef struct smgrid { - char *smgr_name; + const char *smgr_name; } smgrid; /* * StorageManager[] -- List of defined storage managers. - * - * The weird comma placement is to keep compilers happy no matter - * which of these is (or is not) defined. */ - -static smgrid StorageManager[] = { - {"magnetic disk"}, -#ifdef STABLE_MEMORY_STORAGE - {"main memory"} -#endif +static const smgrid StorageManager[] = { + {"magnetic disk"} }; -static int NStorageManagers = lengthof(StorageManager); +static const int NStorageManagers = lengthof(StorageManager); + Datum smgrin(PG_FUNCTION_ARGS) diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 72c93101106..8a23fcc70ef 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.209 2003/11/29 19:51:57 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.210 2004/02/10 01:55:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -48,6 +48,7 @@ #include "parser/parse_type.h" #include "rewrite/rewriteDefine.h" #include "rewrite/rewriteRemove.h" +#include "storage/fd.h" #include "tcop/pquery.h" #include "tcop/utility.h" #include "utils/acl.h" diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index 90577cb6e40..3364322dd5b 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -74,7 +74,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.59 2003/11/29 19:52:00 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.60 2004/02/10 01:55:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -83,6 +83,7 @@ #include "catalog/catalog.h" #include "miscadmin.h" #include "storage/sinval.h" +#include "storage/smgr.h" #include "utils/catcache.h" #include "utils/inval.h" #include "utils/memutils.h" @@ -298,19 +299,22 @@ AddCatcacheInvalidationMessage(InvalidationListHeader *hdr, */ static void AddRelcacheInvalidationMessage(InvalidationListHeader *hdr, - Oid dbId, Oid relId) + Oid dbId, Oid relId, RelFileNode physId) { SharedInvalidationMessage msg; /* Don't add a duplicate item */ - /* We assume comparing relId is sufficient, needn't check dbId */ + /* We assume dbId need not be checked because it will never change */ + /* relfilenode fields must be checked to support reassignment */ ProcessMessageList(hdr->rclist, - if (msg->rc.relId == relId) return); + if (msg->rc.relId == relId && + RelFileNodeEquals(msg->rc.physId, physId)) return); /* OK, add the item */ msg.rc.id = SHAREDINVALRELCACHE_ID; msg.rc.dbId = dbId; msg.rc.relId = relId; + msg.rc.physId = physId; AddInvalidationMessage(&hdr->rclist, &msg); } @@ -391,10 +395,10 @@ RegisterCatcacheInvalidation(int cacheId, * As above, but register a relcache invalidation event. */ static void -RegisterRelcacheInvalidation(Oid dbId, Oid relId) +RegisterRelcacheInvalidation(Oid dbId, Oid relId, RelFileNode physId) { AddRelcacheInvalidationMessage(&CurrentCmdInvalidMsgs, - dbId, relId); + dbId, relId, physId); /* * If the relation being invalidated is one of those cached in the @@ -435,9 +439,17 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg) } else if (msg->id == SHAREDINVALRELCACHE_ID) { - if (msg->rc.dbId == MyDatabaseId || msg->rc.dbId == 0) + /* + * If the message includes a valid relfilenode, we must ensure that + * smgr cache entry gets zapped. The relcache will handle this if + * called, otherwise we must do it directly. + */ + if (msg->rc.dbId == MyDatabaseId || msg->rc.dbId == InvalidOid) { - RelationIdInvalidateRelationCacheByRelationId(msg->rc.relId); + if (OidIsValid(msg->rc.physId.relNode)) + RelationCacheInvalidateEntry(msg->rc.relId, &msg->rc.physId); + else + RelationCacheInvalidateEntry(msg->rc.relId, NULL); for (i = 0; i < cache_callback_count; i++) { @@ -447,6 +459,12 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg) (*ccitem->function) (ccitem->arg, msg->rc.relId); } } + else + { + /* might have smgr entry even if not in our database */ + if (OidIsValid(msg->rc.physId.relNode)) + smgrclosenode(msg->rc.physId); + } } else elog(FATAL, "unrecognized SI message id: %d", msg->id); @@ -456,7 +474,7 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg) * InvalidateSystemCaches * * This blows away all tuples in the system catalog caches and - * all the cached relation descriptors (and closes their files too). + * all the cached relation descriptors and smgr cache entries. * Relation descriptors that have positive refcounts are then rebuilt. * * We call this when we see a shared-inval-queue overflow signal, @@ -469,7 +487,7 @@ InvalidateSystemCaches(void) int i; ResetCatalogCaches(); - RelationCacheInvalidate(); + RelationCacheInvalidate(); /* gets smgr cache too */ for (i = 0; i < cache_callback_count; i++) { @@ -488,11 +506,15 @@ static void PrepareForTupleInvalidation(Relation relation, HeapTuple tuple, void (*CacheIdRegisterFunc) (int, uint32, ItemPointer, Oid), - void (*RelationIdRegisterFunc) (Oid, Oid)) + void (*RelationIdRegisterFunc) (Oid, Oid, + RelFileNode)) { Oid tupleRelId; + Oid databaseId; Oid relationId; + RelFileNode rnode; + /* Do nothing during bootstrap */ if (IsBootstrapProcessingMode()) return; @@ -524,24 +546,49 @@ PrepareForTupleInvalidation(Relation relation, HeapTuple tuple, tupleRelId = RelationGetRelid(relation); if (tupleRelId == RelOid_pg_class) + { + Form_pg_class classtup = (Form_pg_class) GETSTRUCT(tuple); + relationId = HeapTupleGetOid(tuple); + if (classtup->relisshared) + databaseId = InvalidOid; + else + databaseId = MyDatabaseId; + rnode.tblNode = databaseId; /* XXX change for tablespaces */ + rnode.relNode = classtup->relfilenode; + /* + * Note: during a pg_class row update that assigns a new relfilenode + * value, we will be called on both the old and new tuples, and thus + * will broadcast invalidation messages showing both the old and new + * relfilenode values. This ensures that other backends will close + * smgr references to the old relfilenode file. + */ + } else if (tupleRelId == RelOid_pg_attribute) - relationId = ((Form_pg_attribute) GETSTRUCT(tuple))->attrelid; + { + Form_pg_attribute atttup = (Form_pg_attribute) GETSTRUCT(tuple); + + relationId = atttup->attrelid; + /* + * KLUGE ALERT: we always send the relcache event with MyDatabaseId, + * even if the rel in question is shared (which we can't easily tell). + * This essentially means that only backends in this same database + * will react to the relcache flush request. This is in fact + * appropriate, since only those backends could see our pg_attribute + * change anyway. It looks a bit ugly though. + */ + databaseId = MyDatabaseId; + /* We assume no smgr cache flush is needed, either */ + rnode.tblNode = InvalidOid; + rnode.relNode = InvalidOid; + } else return; /* - * Yes. We need to register a relcache invalidation event for the - * relation identified by relationId. - * - * KLUGE ALERT: we always send the relcache event with MyDatabaseId, even - * if the rel in question is shared. This essentially means that only - * backends in this same database will react to the relcache flush - * request. This is in fact appropriate, since only those backends - * could see our pg_class or pg_attribute change anyway. It looks a - * bit ugly though. + * Yes. We need to register a relcache invalidation event. */ - (*RelationIdRegisterFunc) (MyDatabaseId, relationId); + (*RelationIdRegisterFunc) (databaseId, relationId, rnode); } @@ -660,7 +707,7 @@ CommandEndInvalidationMessages(bool isCommit) /* * CacheInvalidateHeapTuple * Register the given tuple for invalidation at end of command - * (ie, current command is outdating this tuple). + * (ie, current command is creating or outdating this tuple). */ void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple) @@ -678,12 +725,44 @@ CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple) * This is used in places that need to force relcache rebuild but aren't * changing any of the tuples recognized as contributors to the relcache * entry by PrepareForTupleInvalidation. (An example is dropping an index.) + * We assume in particular that relfilenode isn't changing. */ void -CacheInvalidateRelcache(Oid relationId) +CacheInvalidateRelcache(Relation relation) { - /* See KLUGE ALERT in PrepareForTupleInvalidation */ - RegisterRelcacheInvalidation(MyDatabaseId, relationId); + Oid databaseId; + Oid relationId; + + relationId = RelationGetRelid(relation); + if (relation->rd_rel->relisshared) + databaseId = InvalidOid; + else + databaseId = MyDatabaseId; + + RegisterRelcacheInvalidation(databaseId, relationId, relation->rd_node); +} + +/* + * CacheInvalidateRelcacheByTuple + * As above, but relation is identified by passing its pg_class tuple. + */ +void +CacheInvalidateRelcacheByTuple(HeapTuple classTuple) +{ + Form_pg_class classtup = (Form_pg_class) GETSTRUCT(classTuple); + Oid databaseId; + Oid relationId; + RelFileNode rnode; + + relationId = HeapTupleGetOid(classTuple); + if (classtup->relisshared) + databaseId = InvalidOid; + else + databaseId = MyDatabaseId; + rnode.tblNode = databaseId; /* XXX change for tablespaces */ + rnode.relNode = classtup->relfilenode; + + RegisterRelcacheInvalidation(databaseId, relationId, rnode); } /* diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 37b81f1244f..8561cff549a 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.196 2004/02/02 00:17:21 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.197 2004/02/10 01:55:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -54,6 +54,7 @@ #include "optimizer/clauses.h" #include "optimizer/planmain.h" #include "optimizer/prep.h" +#include "storage/fd.h" #include "storage/smgr.h" #include "utils/builtins.h" #include "utils/catcache.h" @@ -92,13 +93,6 @@ static HTAB *RelationIdCache; static HTAB *RelationSysNameCache; /* - * Bufmgr uses RelFileNode for lookup. Actually, I would like to do - * not pass Relation to bufmgr & beyond at all and keep some cache - * in smgr, but no time to do it right way now. -- vadim 10/22/2000 - */ -static HTAB *RelationNodeCache; - -/* * This flag is false until we have prepared the critical relcache entries * that are needed to do indexscans on the tables read by relcache building. */ @@ -152,18 +146,12 @@ typedef struct relnamecacheent Relation reldesc; } RelNameCacheEnt; -typedef struct relnodecacheent -{ - RelFileNode relnode; - Relation reldesc; -} RelNodeCacheEnt; - /* * macros to manipulate the lookup hashtables */ #define RelationCacheInsert(RELATION) \ do { \ - RelIdCacheEnt *idhentry; RelNodeCacheEnt *nodentry; bool found; \ + RelIdCacheEnt *idhentry; bool found; \ idhentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \ (void *) &(RELATION->rd_id), \ HASH_ENTER, \ @@ -174,16 +162,6 @@ do { \ errmsg("out of memory"))); \ /* used to give notice if found -- now just keep quiet */ \ idhentry->reldesc = RELATION; \ - nodentry = (RelNodeCacheEnt*)hash_search(RelationNodeCache, \ - (void *) &(RELATION->rd_node), \ - HASH_ENTER, \ - &found); \ - if (nodentry == NULL) \ - ereport(ERROR, \ - (errcode(ERRCODE_OUT_OF_MEMORY), \ - errmsg("out of memory"))); \ - /* used to give notice if found -- now just keep quiet */ \ - nodentry->reldesc = RELATION; \ if (IsSystemNamespace(RelationGetNamespace(RELATION))) \ { \ char *relname = RelationGetRelationName(RELATION); \ @@ -223,30 +201,14 @@ do { \ RELATION = NULL; \ } while(0) -#define RelationNodeCacheLookup(NODE, RELATION) \ -do { \ - RelNodeCacheEnt *hentry; \ - hentry = (RelNodeCacheEnt*)hash_search(RelationNodeCache, \ - (void *)&(NODE), HASH_FIND,NULL); \ - if (hentry) \ - RELATION = hentry->reldesc; \ - else \ - RELATION = NULL; \ -} while(0) - #define RelationCacheDelete(RELATION) \ do { \ - RelIdCacheEnt *idhentry; RelNodeCacheEnt *nodentry; \ + RelIdCacheEnt *idhentry; \ idhentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \ (void *)&(RELATION->rd_id), \ HASH_REMOVE, NULL); \ if (idhentry == NULL) \ elog(WARNING, "trying to delete a rd_id reldesc that does not exist"); \ - nodentry = (RelNodeCacheEnt*)hash_search(RelationNodeCache, \ - (void *)&(RELATION->rd_node), \ - HASH_REMOVE, NULL); \ - if (nodentry == NULL) \ - elog(WARNING, "trying to delete a rd_node reldesc that does not exist"); \ if (IsSystemNamespace(RelationGetNamespace(RELATION))) \ { \ char *relname = RelationGetRelationName(RELATION); \ @@ -423,7 +385,7 @@ AllocateRelationDesc(Relation relation, Form_pg_class relp) relation->rd_targblock = InvalidBlockNumber; /* make sure relation is marked as having no open file yet */ - relation->rd_fd = -1; + relation->rd_smgr = NULL; /* * Copy the relation tuple form @@ -914,7 +876,7 @@ RelationBuildDesc(RelationBuildDescInfo buildinfo, relation->rd_node.relNode = relation->rd_rel->relfilenode; /* make sure relation is marked as having no open file yet */ - relation->rd_fd = -1; + relation->rd_smgr = NULL; /* * Insert newly created relation into relcache hash tables. @@ -1303,7 +1265,7 @@ formrdesc(const char *relationName, relation->rd_targblock = InvalidBlockNumber; /* make sure relation is marked as having no open file yet */ - relation->rd_fd = -1; + relation->rd_smgr = NULL; /* * initialize reference count @@ -1482,30 +1444,6 @@ RelationSysNameCacheGetRelation(const char *relationName) } /* - * RelationNodeCacheGetRelation - * - * As above, but lookup by relfilenode. - * - * NOTE: this must NOT try to revalidate invalidated nailed indexes, since - * that could cause us to return an entry with a different relfilenode than - * the caller asked for. Currently this is used only by the buffer manager. - * Really the bufmgr's idea of relations should be separated out from the - * relcache ... - */ -Relation -RelationNodeCacheGetRelation(RelFileNode rnode) -{ - Relation rd; - - RelationNodeCacheLookup(rnode, rd); - - if (RelationIsValid(rd)) - RelationIncrementReferenceCount(rd); - - return rd; -} - -/* * RelationIdGetRelation * * Lookup a reldesc by OID; make one if not already in cache. @@ -1635,14 +1573,8 @@ RelationReloadClassinfo(Relation relation) elog(ERROR, "could not find tuple for system relation %u", relation->rd_id); relp = (Form_pg_class) GETSTRUCT(pg_class_tuple); - if (relation->rd_node.relNode != relp->relfilenode) - { - /* We have to re-insert the entry into the relcache indexes */ - RelationCacheDelete(relation); - memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE); - relation->rd_node.relNode = relp->relfilenode; - RelationCacheInsert(relation); - } + memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE); + relation->rd_node.relNode = relp->relfilenode; heap_freetuple(pg_class_tuple); /* Must adjust number of blocks after we know the new relfilenode */ relation->rd_targblock = InvalidBlockNumber; @@ -1672,10 +1604,10 @@ RelationClearRelation(Relation relation, bool rebuild) * ensures that the low-level file access state is updated after, say, * a vacuum truncation. */ - if (relation->rd_fd >= 0) + if (relation->rd_smgr) { - smgrclose(DEFAULT_SMGR, relation); - relation->rd_fd = -1; + smgrclose(relation->rd_smgr); + relation->rd_smgr = NULL; } /* @@ -1866,18 +1798,31 @@ RelationForgetRelation(Oid rid) } /* - * RelationIdInvalidateRelationCacheByRelationId + * RelationCacheInvalidateEntry * * This routine is invoked for SI cache flush messages. * - * We used to skip local relations, on the grounds that they could - * not be targets of cross-backend SI update messages; but it seems - * safer to process them, so that our *own* SI update messages will - * have the same effects during CommandCounterIncrement for both - * local and nonlocal relations. + * Any relcache entry matching the relid must be flushed. (Note: caller has + * already determined that the relid belongs to our database or is a shared + * relation.) If rnode isn't NULL, we must also ensure that any smgr cache + * entry matching that rnode is flushed. + * + * Ordinarily, if rnode is supplied then it will match the relfilenode of + * the target relid. However, it's possible for rnode to be different if + * someone is engaged in a relfilenode change. In that case we want to + * make sure we clear the right cache entries. This has to be done here + * to keep things in sync between relcache and smgr cache --- we can't have + * someone flushing an smgr cache entry that a relcache entry still points + * to. + * + * We used to skip local relations, on the grounds that they could + * not be targets of cross-backend SI update messages; but it seems + * safer to process them, so that our *own* SI update messages will + * have the same effects during CommandCounterIncrement for both + * local and nonlocal relations. */ void -RelationIdInvalidateRelationCacheByRelationId(Oid relationId) +RelationCacheInvalidateEntry(Oid relationId, RelFileNode *rnode) { Relation relation; @@ -1886,14 +1831,27 @@ RelationIdInvalidateRelationCacheByRelationId(Oid relationId) if (PointerIsValid(relation)) { relcacheInvalsReceived++; + if (rnode) + { + /* Need to be sure smgr is flushed, but don't do it twice */ + if (relation->rd_smgr == NULL || + !RelFileNodeEquals(*rnode, relation->rd_node)) + smgrclosenode(*rnode); + } RelationFlushRelation(relation); } + else + { + if (rnode) + smgrclosenode(*rnode); + } } /* * RelationCacheInvalidate * Blow away cached relation descriptors that have zero reference counts, - * and rebuild those with positive reference counts. + * and rebuild those with positive reference counts. Also reset the smgr + * relation cache. * * This is currently used only to recover from SI message buffer overflow, * so we do not touch new-in-transaction relations; they cannot be targets @@ -1934,6 +1892,13 @@ RelationCacheInvalidate(void) { relation = idhentry->reldesc; + /* Must close all smgr references to avoid leaving dangling ptrs */ + if (relation->rd_smgr) + { + smgrclose(relation->rd_smgr); + relation->rd_smgr = NULL; + } + /* Ignore new relations, since they are never SI targets */ if (relation->rd_isnew) continue; @@ -1970,6 +1935,13 @@ RelationCacheInvalidate(void) rebuildList = nconc(rebuildFirstList, rebuildList); + /* + * Now zap any remaining smgr cache entries. This must happen before + * we start to rebuild entries, since that may involve catalog fetches + * which will re-open catalog files. + */ + smgrcloseall(); + /* Phase 2: rebuild the items found to need rebuild in phase 1 */ foreach(l, rebuildList) { @@ -2107,7 +2079,7 @@ RelationBuildLocalRelation(const char *relname, rel->rd_targblock = InvalidBlockNumber; /* make sure relation is marked as having no open file yet */ - rel->rd_fd = -1; + rel->rd_smgr = NULL; RelationSetReferenceCount(rel, 1); @@ -2233,12 +2205,6 @@ RelationCacheInitialize(void) RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE, &ctl, HASH_ELEM | HASH_FUNCTION); - ctl.keysize = sizeof(RelFileNode); - ctl.entrysize = sizeof(RelNodeCacheEnt); - ctl.hash = tag_hash; - RelationNodeCache = hash_create("Relcache by rnode", INITRELCACHESIZE, - &ctl, HASH_ELEM | HASH_FUNCTION); - /* * Try to load the relcache cache file. If successful, we're done for * now. Otherwise, initialize the cache with pre-made descriptors for @@ -2406,65 +2372,6 @@ RelationCacheInitializePhase3(void) } } - -/* used by XLogInitCache */ -void CreateDummyCaches(void); -void DestroyDummyCaches(void); - -void -CreateDummyCaches(void) -{ - MemoryContext oldcxt; - HASHCTL ctl; - - if (!CacheMemoryContext) - CreateCacheMemoryContext(); - - oldcxt = MemoryContextSwitchTo(CacheMemoryContext); - - MemSet(&ctl, 0, sizeof(ctl)); - ctl.keysize = sizeof(NameData); - ctl.entrysize = sizeof(RelNameCacheEnt); - RelationSysNameCache = hash_create("Relcache by name", INITRELCACHESIZE, - &ctl, HASH_ELEM); - - ctl.keysize = sizeof(Oid); - ctl.entrysize = sizeof(RelIdCacheEnt); - ctl.hash = tag_hash; - RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE, - &ctl, HASH_ELEM | HASH_FUNCTION); - - ctl.keysize = sizeof(RelFileNode); - ctl.entrysize = sizeof(RelNodeCacheEnt); - ctl.hash = tag_hash; - RelationNodeCache = hash_create("Relcache by rnode", INITRELCACHESIZE, - &ctl, HASH_ELEM | HASH_FUNCTION); - - MemoryContextSwitchTo(oldcxt); -} - -void -DestroyDummyCaches(void) -{ - MemoryContext oldcxt; - - if (!CacheMemoryContext) - return; - - oldcxt = MemoryContextSwitchTo(CacheMemoryContext); - - if (RelationIdCache) - hash_destroy(RelationIdCache); - if (RelationSysNameCache) - hash_destroy(RelationSysNameCache); - if (RelationNodeCache) - hash_destroy(RelationNodeCache); - - RelationIdCache = RelationSysNameCache = RelationNodeCache = NULL; - - MemoryContextSwitchTo(oldcxt); -} - static void AttrDefaultFetch(Relation relation) { @@ -3125,7 +3032,7 @@ load_relcache_init_file(void) /* * Reset transient-state fields in the relcache entry */ - rel->rd_fd = -1; + rel->rd_smgr = NULL; rel->rd_targblock = InvalidBlockNumber; if (rel->rd_isnailed) RelationSetReferenceCount(rel, 1); diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c index e93dcb8a84c..075269b4ad0 100644 --- a/src/backend/utils/init/miscinit.c +++ b/src/backend/utils/init/miscinit.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/init/miscinit.c,v 1.122 2004/02/08 22:28:57 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/utils/init/miscinit.c,v 1.123 2004/02/10 01:55:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -33,6 +33,7 @@ #include "catalog/pg_shadow.h" #include "libpq/libpq-be.h" #include "miscadmin.h" +#include "storage/fd.h" #include "storage/ipc.h" #include "storage/pg_shmem.h" #include "utils/builtins.h" diff --git a/src/include/catalog/pg_database.h b/src/include/catalog/pg_database.h index af113eb66a0..226c5c2f99c 100644 --- a/src/include/catalog/pg_database.h +++ b/src/include/catalog/pg_database.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/pg_database.h,v 1.30 2003/11/29 22:40:58 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/catalog/pg_database.h,v 1.31 2004/02/10 01:55:26 tgl Exp $ * * NOTES * the genbki.sh script reads this file and generates .bki @@ -72,15 +72,6 @@ typedef FormData_pg_database *Form_pg_database; DATA(insert OID = 1 ( template1 PGUID ENCODING t t 0 0 0 "" _null_ _null_ )); DESCR("Default template database"); - #define TemplateDbOid 1 -/* Just to mark OID as used for unused_oid script -:) */ -#define DATAMARKOID(x) - -DATAMARKOID(= 2) -#define RecoveryDb 2 - -#undef DATAMARKOID - #endif /* PG_DATABASE_H */ diff --git a/src/include/storage/sinval.h b/src/include/storage/sinval.h index a0b523da3d4..84706272dec 100644 --- a/src/include/storage/sinval.h +++ b/src/include/storage/sinval.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/sinval.h,v 1.32 2003/11/29 22:41:13 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/storage/sinval.h,v 1.33 2004/02/10 01:55:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,6 +16,7 @@ #include "storage/backendid.h" #include "storage/itemptr.h" +#include "storage/relfilenode.h" /* @@ -27,6 +28,13 @@ * ID field). -1 means a relcache inval message. Other negative values * are available to identify other inval message types. * + * Relcache invalidation messages usually also cause invalidation of entries + * in the smgr's relation cache. This means they must carry both logical + * and physical relation ID info (ie, both dbOID/relOID and RelFileNode). + * In some cases RelFileNode information is not available so the sender fills + * those fields with zeroes --- this is okay so long as no smgr cache flush + * is required. + * * Shared-inval events are initially driven by detecting tuple inserts, * updates and deletions in system catalogs (see CacheInvalidateHeapTuple). * An update generates two inval events, one for the old tuple and one for @@ -63,6 +71,12 @@ typedef struct int16 id; /* type field --- must be first */ Oid dbId; /* database ID, or 0 if a shared relation */ Oid relId; /* relation ID */ + RelFileNode physId; /* physical file ID */ + /* + * Note: it is likely that RelFileNode will someday be changed to + * include database ID. In that case the dbId field will be redundant + * and should be removed to save space. + */ } SharedInvalRelcacheMsg; typedef union diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 0fd20fd436f..738e436fb7d 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.39 2003/11/29 22:41:13 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.40 2004/02/10 01:55:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,36 +16,54 @@ #include "access/xlog.h" #include "fmgr.h" -#include "storage/relfilenode.h" #include "storage/block.h" -#include "utils/rel.h" +#include "storage/relfilenode.h" + + +/* + * smgr.c maintains a table of SMgrRelation objects, which are essentially + * cached file handles. An SMgrRelation is created (if not already present) + * by smgropen(), and destroyed by smgrclose(). Note that neither of these + * operations imply I/O, they just create or destroy a hashtable entry. + * (But smgrclose() may release associated resources, such as OS-level file + * descriptors.) + */ +typedef struct SMgrRelationData +{ + /* rnode is the hashtable lookup key, so it must be first! */ + RelFileNode smgr_rnode; /* relation physical identifier */ + /* additional public fields may someday exist here */ -#define SM_FAIL 0 -#define SM_SUCCESS 1 + /* + * Fields below here are intended to be private to smgr.c and its + * submodules. Do not touch them from elsewhere. + */ + int smgr_which; /* storage manager selector */ -#define DEFAULT_SMGR 0 + struct _MdfdVec *md_fd; /* for md.c; NULL if not open */ +} SMgrRelationData; -extern int smgrinit(void); -extern int smgrcreate(int16 which, Relation reln); -extern int smgrunlink(int16 which, Relation reln); -extern int smgrextend(int16 which, Relation reln, BlockNumber blocknum, - char *buffer); -extern int smgropen(int16 which, Relation reln, bool failOK); -extern int smgrclose(int16 which, Relation reln); -extern int smgrread(int16 which, Relation reln, BlockNumber blocknum, - char *buffer); -extern int smgrwrite(int16 which, Relation reln, BlockNumber blocknum, - char *buffer); -extern int smgrblindwrt(int16 which, RelFileNode rnode, - BlockNumber blkno, char *buffer); -extern BlockNumber smgrnblocks(int16 which, Relation reln); -extern BlockNumber smgrtruncate(int16 which, Relation reln, - BlockNumber nblocks); -extern int smgrDoPendingDeletes(bool isCommit); -extern int smgrcommit(void); -extern int smgrabort(void); -extern int smgrsync(void); +typedef SMgrRelationData *SMgrRelation; + + +extern void smgrinit(void); +extern SMgrRelation smgropen(RelFileNode rnode); +extern void smgrclose(SMgrRelation reln); +extern void smgrcloseall(void); +extern void smgrclosenode(RelFileNode rnode); +extern void smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo); +extern void smgrscheduleunlink(SMgrRelation reln, bool isTemp); +extern void smgrdounlink(SMgrRelation reln, bool isTemp, bool isRedo); +extern void smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer); +extern void smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer); +extern void smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer); +extern BlockNumber smgrnblocks(SMgrRelation reln); +extern BlockNumber smgrtruncate(SMgrRelation reln, BlockNumber nblocks); +extern void smgrDoPendingDeletes(bool isCommit); +extern void smgrcommit(void); +extern void smgrabort(void); +extern void smgrsync(void); extern void smgr_redo(XLogRecPtr lsn, XLogRecord *record); extern void smgr_undo(XLogRecPtr lsn, XLogRecord *record); @@ -55,38 +73,18 @@ extern void smgr_desc(char *buf, uint8 xl_info, char *rec); /* internals: move me elsewhere -- ay 7/94 */ /* in md.c */ -extern int mdinit(void); -extern int mdcreate(Relation reln); -extern int mdunlink(RelFileNode rnode); -extern int mdextend(Relation reln, BlockNumber blocknum, char *buffer); -extern int mdopen(Relation reln); -extern int mdclose(Relation reln); -extern int mdread(Relation reln, BlockNumber blocknum, char *buffer); -extern int mdwrite(Relation reln, BlockNumber blocknum, char *buffer); -extern int mdblindwrt(RelFileNode rnode, BlockNumber blkno, char *buffer); -extern BlockNumber mdnblocks(Relation reln); -extern BlockNumber mdtruncate(Relation reln, BlockNumber nblocks); -extern int mdcommit(void); -extern int mdabort(void); -extern int mdsync(void); - -/* mm.c */ -extern int mminit(void); -extern int mmcreate(Relation reln); -extern int mmunlink(RelFileNode rnode); -extern int mmextend(Relation reln, BlockNumber blocknum, char *buffer); -extern int mmopen(Relation reln); -extern int mmclose(Relation reln); -extern int mmread(Relation reln, BlockNumber blocknum, char *buffer); -extern int mmwrite(Relation reln, BlockNumber blocknum, char *buffer); -extern int mmblindwrt(RelFileNode rnode, BlockNumber blkno, char *buffer); -extern BlockNumber mmnblocks(Relation reln); -extern BlockNumber mmtruncate(Relation reln, BlockNumber nblocks); -extern int mmcommit(void); -extern int mmabort(void); - -extern int mmshutdown(void); -extern int MMShmemSize(void); +extern bool mdinit(void); +extern bool mdclose(SMgrRelation reln); +extern bool mdcreate(SMgrRelation reln, bool isRedo); +extern bool mdunlink(RelFileNode rnode, bool isRedo); +extern bool mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer); +extern bool mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer); +extern bool mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer); +extern BlockNumber mdnblocks(SMgrRelation reln); +extern BlockNumber mdtruncate(SMgrRelation reln, BlockNumber nblocks); +extern bool mdcommit(void); +extern bool mdabort(void); +extern bool mdsync(void); /* smgrtype.c */ extern Datum smgrout(PG_FUNCTION_ARGS); diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h index 467d15ee839..e7052726f27 100644 --- a/src/include/utils/inval.h +++ b/src/include/utils/inval.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.29 2003/11/29 22:41:15 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.30 2004/02/10 01:55:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -28,7 +28,9 @@ extern void CommandEndInvalidationMessages(bool isCommit); extern void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple); -extern void CacheInvalidateRelcache(Oid relationId); +extern void CacheInvalidateRelcache(Relation relation); + +extern void CacheInvalidateRelcacheByTuple(HeapTuple classTuple); extern void CacheRegisterSyscacheCallback(int cacheid, CacheCallbackFunction func, diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index dfdb8491e3c..8532c5a737a 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.72 2004/01/06 18:07:32 neilc Exp $ + * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.73 2004/02/10 01:55:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -20,7 +20,6 @@ #include "catalog/pg_index.h" #include "rewrite/prs2lock.h" #include "storage/block.h" -#include "storage/fd.h" #include "storage/relfilenode.h" @@ -98,16 +97,16 @@ typedef struct PgStat_Info bool index_scan_counted; } PgStat_Info; + /* * Here are the contents of a relation cache entry. */ typedef struct RelationData { - File rd_fd; /* open file descriptor, or -1 if - * none; this is NOT an operating - * system file descriptor */ - RelFileNode rd_node; /* file node (physical identifier) */ + RelFileNode rd_node; /* relation physical identifier */ + /* use "struct" here to avoid needing to include smgr.h: */ + struct SMgrRelationData *rd_smgr; /* cached file handle, or NULL */ BlockNumber rd_nblocks; /* number of blocks in rel */ BlockNumber rd_targblock; /* current insertion target block, or * InvalidBlockNumber */ @@ -227,14 +226,6 @@ typedef Relation *RelationPtr; #define RelationGetRelid(relation) ((relation)->rd_id) /* - * RelationGetFile - * Returns the open file descriptor for the rel, or -1 if - * none. This is NOT an operating system file descriptor; see md.c - * for more information - */ -#define RelationGetFile(relation) ((relation)->rd_fd) - -/* * RelationGetNumberOfAttributes * Returns the number of attributes in a relation. */ diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index c7c6a9231f5..848d68b2077 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/relcache.h,v 1.38 2003/11/29 22:41:16 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/utils/relcache.h,v 1.39 2004/02/10 01:55:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -24,7 +24,6 @@ extern Relation RelationSysNameGetRelation(const char *relationName); /* finds an existing cache entry, but won't make a new one */ extern Relation RelationIdCacheGetRelation(Oid relationId); -extern Relation RelationNodeCacheGetRelation(RelFileNode rnode); extern void RelationClose(Relation relation); @@ -61,7 +60,7 @@ extern Relation RelationBuildLocalRelation(const char *relname, */ extern void RelationForgetRelation(Oid rid); -extern void RelationIdInvalidateRelationCacheByRelationId(Oid relationId); +extern void RelationCacheInvalidateEntry(Oid relationId, RelFileNode *rnode); extern void RelationCacheInvalidate(void); @@ -73,11 +72,6 @@ extern void AtEOXact_RelationCache(bool commit); extern bool RelationIdIsInInitFile(Oid relationId); extern void RelationCacheInitFileInvalidate(bool beforeSend); -/* XLOG support */ -extern void CreateDummyCaches(void); -extern void DestroyDummyCaches(void); - - /* should be used only by relcache.c and catcache.c */ extern bool criticalRelcachesBuilt; |