summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane2007-06-07 19:19:57 +0000
committerTom Lane2007-06-07 19:19:57 +0000
commit3dae420f7b9ec0cc748bd2b252e885303e327841 (patch)
treed8d7cb3ba18653c835d5595d094d053922c4d2fe
parent75cf223f2aaaead6d7ddb0a98fd6d69c7182b1a4 (diff)
Rework temp_tablespaces patch so that temp tablespaces are assigned separately
for each temp file, rather than once per sort or hashjoin; this allows spreading the data of a large sort or join across multiple tablespaces. (I remain dubious that this will make any difference in practice, but certain people insisted.) Arrange to cache the results of parsing the GUC variable instead of recomputing from scratch on every demand, and push usage of the cache down to the bottommost fd.c level.
-rw-r--r--doc/src/sgml/config.sgml20
-rw-r--r--src/backend/commands/tablespace.c227
-rw-r--r--src/backend/executor/nodeHash.c15
-rw-r--r--src/backend/executor/nodeHashjoin.c8
-rw-r--r--src/backend/storage/file/buffile.c16
-rw-r--r--src/backend/storage/file/fd.c97
-rw-r--r--src/backend/utils/sort/logtape.c3
-rw-r--r--src/backend/utils/sort/tuplesort.c7
-rw-r--r--src/backend/utils/sort/tuplestore.c12
-rw-r--r--src/include/commands/tablespace.h3
-rw-r--r--src/include/executor/hashjoin.h2
-rw-r--r--src/include/executor/nodeHashjoin.h5
-rw-r--r--src/include/storage/buffile.h2
-rw-r--r--src/include/storage/fd.h6
14 files changed, 269 insertions, 154 deletions
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 03fc6741b3..3833688e95 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3510,16 +3510,22 @@ SELECT * FROM parent WHERE key = 2400;
<para>
The value is a list of names of tablespaces. When there is more than
one name in the list, <productname>PostgreSQL</> chooses a random
- member of the list each time a temporary object is to be created.
+ member of the list each time a temporary object is to be created;
+ except that within a transaction, successively created temporary
+ objects are placed in successive tablespaces from the list.
+ If any element of the list is an empty string,
+ <productname>PostgreSQL</> will automatically use the default
+ tablespace of the current database instead.
</para>
<para>
- If any element of the list is an empty string or does not match the
- name of any existing tablespace, <productname>PostgreSQL</> will
- automatically use the default tablespace of the current database
- instead. If a nondefault tablespace
- is specified, the user must have <literal>CREATE</> privilege
- for it, or creation attempts will fail.
+ When <varname>temp_tablespaces</> is set interactively, specifying a
+ nonexistent tablespace is an error, as is specifying a tablespace for
+ which the user does not have <literal>CREATE</> privilege. However,
+ when using a previously set value, nonexistent tablespaces are
+ ignored, as are tablespaces for which the user lacks
+ <literal>CREATE</> privilege. In particular, this rule applies when
+ using a value set in <filename>postgresql.conf</>.
</para>
<para>
diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c
index 8f77c85279..4dc25903f3 100644
--- a/src/backend/commands/tablespace.c
+++ b/src/backend/commands/tablespace.c
@@ -63,6 +63,7 @@
#include "utils/fmgroids.h"
#include "utils/guc.h"
#include "utils/lsyscache.h"
+#include "utils/memutils.h"
/* GUC variables */
@@ -72,7 +73,6 @@ char *temp_tablespaces = NULL;
static bool remove_tablespace_directories(Oid tablespaceoid, bool redo);
static void set_short_version(const char *path);
-static Oid getTempTablespace(void);
/*
@@ -921,9 +921,12 @@ GetDefaultTablespace(bool forTemp)
{
Oid result;
- /* The temp-table case is handled by getTempTablespace() */
+ /* The temp-table case is handled elsewhere */
if (forTemp)
- return getTempTablespace();
+ {
+ PrepareTempTablespaces();
+ return GetNextTempTableSpace();
+ }
/* Fast path for default_tablespace == "" */
if (default_tablespace == NULL || default_tablespace[0] == '\0')
@@ -958,7 +961,6 @@ assign_temp_tablespaces(const char *newval, bool doit, GucSource source)
{
char *rawname;
List *namelist;
- ListCell *l;
/* Need a modifiable copy of string */
rawname = pstrdup(newval);
@@ -975,24 +977,79 @@ assign_temp_tablespaces(const char *newval, bool doit, GucSource source)
/*
* If we aren't inside a transaction, we cannot do database access so
* cannot verify the individual names. Must accept the list on faith.
+ * Fortunately, there's then also no need to pass the data to fd.c.
*/
- if (source >= PGC_S_INTERACTIVE && IsTransactionState())
+ if (IsTransactionState())
{
+ /*
+ * If we error out below, or if we are called multiple times in one
+ * transaction, we'll leak a bit of TopTransactionContext memory.
+ * Doesn't seem worth worrying about.
+ */
+ Oid *tblSpcs;
+ int numSpcs;
+ ListCell *l;
+
+ tblSpcs = (Oid *) MemoryContextAlloc(TopTransactionContext,
+ list_length(namelist) * sizeof(Oid));
+ numSpcs = 0;
foreach(l, namelist)
{
char *curname = (char *) lfirst(l);
+ Oid curoid;
+ AclResult aclresult;
/* Allow an empty string (signifying database default) */
if (curname[0] == '\0')
+ {
+ tblSpcs[numSpcs++] = InvalidOid;
continue;
+ }
/* Else verify that name is a valid tablespace name */
- if (get_tablespace_oid(curname) == InvalidOid)
- ereport(ERROR,
- (errcode(ERRCODE_UNDEFINED_OBJECT),
- errmsg("tablespace \"%s\" does not exist",
- curname)));
+ curoid = get_tablespace_oid(curname);
+ if (curoid == InvalidOid)
+ {
+ /*
+ * In an interactive SET command, we ereport for bad info.
+ * Otherwise, silently ignore any bad list elements.
+ */
+ if (source >= PGC_S_INTERACTIVE)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("tablespace \"%s\" does not exist",
+ curname)));
+ continue;
+ }
+
+ /*
+ * Allow explicit specification of database's default tablespace
+ * in temp_tablespaces without triggering permissions checks.
+ */
+ if (curoid == MyDatabaseTableSpace)
+ {
+ tblSpcs[numSpcs++] = InvalidOid;
+ continue;
+ }
+
+ /* Check permissions similarly */
+ aclresult = pg_tablespace_aclcheck(curoid, GetUserId(),
+ ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ {
+ if (source >= PGC_S_INTERACTIVE)
+ aclcheck_error(aclresult, ACL_KIND_TABLESPACE, curname);
+ continue;
+ }
+
+ tblSpcs[numSpcs++] = curoid;
}
+
+ /* If actively "doing it", give the new list to fd.c */
+ if (doit)
+ SetTempTablespaces(tblSpcs, numSpcs);
+ else
+ pfree(tblSpcs);
}
pfree(rawname);
@@ -1002,69 +1059,34 @@ assign_temp_tablespaces(const char *newval, bool doit, GucSource source)
}
/*
- * GetTempTablespace -- get the OID of the next temp tablespace to use
- *
- * May return InvalidOid to indicate "use the database's default tablespace".
+ * PrepareTempTablespaces -- prepare to use temp tablespaces
*
- * This is different from GetDefaultTablespace(true) in just two ways:
- * 1. We check privileges here instead of leaving it to the caller.
- * 2. It's safe to call this outside a transaction (we just return InvalidOid).
- * The transaction state check is used so that this can be called from
- * low-level places that might conceivably run outside a transaction.
+ * If we have not already done so in the current transaction, parse the
+ * temp_tablespaces GUC variable and tell fd.c which tablespace(s) to use
+ * for temp files.
*/
-Oid
-GetTempTablespace(void)
-{
- Oid result;
-
- /* Can't do catalog access unless within a transaction */
- if (!IsTransactionState())
- return InvalidOid;
-
- /* OK, select a temp tablespace */
- result = getTempTablespace();
-
- /* Check permissions except when using database's default */
- if (OidIsValid(result))
- {
- AclResult aclresult;
-
- aclresult = pg_tablespace_aclcheck(result, GetUserId(),
- ACL_CREATE);
- if (aclresult != ACLCHECK_OK)
- aclcheck_error(aclresult, ACL_KIND_TABLESPACE,
- get_tablespace_name(result));
- }
-
- return result;
-}
-
-/*
- * getTempTablespace -- get the OID of the next temp tablespace to use
- *
- * This has exactly the API defined for GetDefaultTablespace(true),
- * in particular that caller is responsible for permissions checks.
- *
- * This exists to hide (and possibly optimize the use of) the
- * temp_tablespaces GUC variable.
- */
-static Oid
-getTempTablespace(void)
+void
+PrepareTempTablespaces(void)
{
- Oid result;
char *rawname;
List *namelist;
- int nnames;
- char *curname;
+ Oid *tblSpcs;
+ int numSpcs;
+ ListCell *l;
- if (temp_tablespaces == NULL)
- return InvalidOid;
+ /* No work if already done in current transaction */
+ if (TempTablespacesAreSet())
+ return;
/*
- * We re-parse the string on each call; this is a bit expensive, but
- * we don't expect this function will be called many times per query,
- * so it's probably not worth being tenser.
+ * Can't do catalog access unless within a transaction. This is just
+ * a safety check in case this function is called by low-level code that
+ * could conceivably execute outside a transaction. Note that in such
+ * a scenario, fd.c will fall back to using the current database's default
+ * tablespace, which should always be OK.
*/
+ if (!IsTransactionState())
+ return;
/* Need a modifiable copy of string */
rawname = pstrdup(temp_tablespaces);
@@ -1073,51 +1095,60 @@ getTempTablespace(void)
if (!SplitIdentifierString(rawname, ',', &namelist))
{
/* syntax error in name list */
+ SetTempTablespaces(NULL, 0);
pfree(rawname);
list_free(namelist);
- return InvalidOid;
+ return;
}
- nnames = list_length(namelist);
- /* Fast path for temp_tablespaces == "" */
- if (nnames == 0)
+ /* Store tablespace OIDs in an array in TopTransactionContext */
+ tblSpcs = (Oid *) MemoryContextAlloc(TopTransactionContext,
+ list_length(namelist) * sizeof(Oid));
+ numSpcs = 0;
+ foreach(l, namelist)
{
- pfree(rawname);
- list_free(namelist);
- return InvalidOid;
- }
+ char *curname = (char *) lfirst(l);
+ Oid curoid;
+ AclResult aclresult;
- /* Select a random element */
- if (nnames == 1) /* no need for a random() call */
- curname = (char *) linitial(namelist);
- else
- curname = (char *) list_nth(namelist, random() % nnames);
+ /* Allow an empty string (signifying database default) */
+ if (curname[0] == '\0')
+ {
+ tblSpcs[numSpcs++] = InvalidOid;
+ continue;
+ }
- /*
- * Empty string means "database's default", else look up the tablespace.
- *
- * It is tempting to cache this lookup for more speed, but then we would
- * fail to detect the case where the tablespace was dropped since the GUC
- * variable was set. Note also that we don't complain if the value fails
- * to refer to an existing tablespace; we just silently return InvalidOid,
- * causing the new object to be created in the database's tablespace.
- */
- if (curname[0] == '\0')
- result = InvalidOid;
- else
- result = get_tablespace_oid(curname);
+ /* Else verify that name is a valid tablespace name */
+ curoid = get_tablespace_oid(curname);
+ if (curoid == InvalidOid)
+ {
+ /* Silently ignore any bad list elements */
+ continue;
+ }
- /*
- * Allow explicit specification of database's default tablespace in
- * temp_tablespaces without triggering permissions checks.
- */
- if (result == MyDatabaseTableSpace)
- result = InvalidOid;
+ /*
+ * Allow explicit specification of database's default tablespace
+ * in temp_tablespaces without triggering permissions checks.
+ */
+ if (curoid == MyDatabaseTableSpace)
+ {
+ tblSpcs[numSpcs++] = InvalidOid;
+ continue;
+ }
+
+ /* Check permissions similarly */
+ aclresult = pg_tablespace_aclcheck(curoid, GetUserId(),
+ ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ continue;
+
+ tblSpcs[numSpcs++] = curoid;
+ }
+
+ SetTempTablespaces(tblSpcs, numSpcs);
pfree(rawname);
list_free(namelist);
-
- return result;
}
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 94b3a824a0..acddb71870 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -267,7 +267,6 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
hashtable->totalTuples = 0;
hashtable->innerBatchFile = NULL;
hashtable->outerBatchFile = NULL;
- hashtable->hashTblSpc = InvalidOid;
hashtable->spaceUsed = 0;
hashtable->spaceAllowed = work_mem * 1024L;
@@ -327,8 +326,8 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
hashtable->outerBatchFile = (BufFile **)
palloc0(nbatch * sizeof(BufFile *));
/* The files will not be opened until needed... */
- /* ... but we want to choose the tablespace only once */
- hashtable->hashTblSpc = GetTempTablespace();
+ /* ... but make sure we have temp tablespaces established for them */
+ PrepareTempTablespaces();
}
/*
@@ -510,8 +509,8 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
palloc0(nbatch * sizeof(BufFile *));
hashtable->outerBatchFile = (BufFile **)
palloc0(nbatch * sizeof(BufFile *));
- /* time to choose the tablespace, too */
- hashtable->hashTblSpc = GetTempTablespace();
+ /* time to establish the temp tablespaces, too */
+ PrepareTempTablespaces();
}
else
{
@@ -564,8 +563,7 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
{
/* dump it out */
Assert(batchno > curbatch);
- ExecHashJoinSaveTuple(hashtable,
- HJTUPLE_MINTUPLE(tuple),
+ ExecHashJoinSaveTuple(HJTUPLE_MINTUPLE(tuple),
tuple->hashvalue,
&hashtable->innerBatchFile[batchno]);
/* and remove from hash table */
@@ -657,8 +655,7 @@ ExecHashTableInsert(HashJoinTable hashtable,
* put the tuple into a temp file for later batches
*/
Assert(batchno > hashtable->curbatch);
- ExecHashJoinSaveTuple(hashtable,
- tuple,
+ ExecHashJoinSaveTuple(tuple,
hashvalue,
&hashtable->innerBatchFile[batchno]);
}
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index 48d4a43bff..46a88fc052 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -223,8 +223,7 @@ ExecHashJoin(HashJoinState *node)
* in the corresponding outer-batch file.
*/
Assert(batchno > hashtable->curbatch);
- ExecHashJoinSaveTuple(hashtable,
- ExecFetchSlotMinimalTuple(outerTupleSlot),
+ ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot),
hashvalue,
&hashtable->outerBatchFile[batchno]);
node->hj_NeedNewOuter = true;
@@ -755,8 +754,7 @@ start_over:
* will get messed up.
*/
void
-ExecHashJoinSaveTuple(HashJoinTable hashtable,
- MinimalTuple tuple, uint32 hashvalue,
+ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue,
BufFile **fileptr)
{
BufFile *file = *fileptr;
@@ -765,7 +763,7 @@ ExecHashJoinSaveTuple(HashJoinTable hashtable,
if (file == NULL)
{
/* First write to this batch file, so open it. */
- file = BufFileCreateTemp(false, hashtable->hashTblSpc);
+ file = BufFileCreateTemp(false);
*fileptr = file;
}
diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c
index 9a772577c4..571f656cce 100644
--- a/src/backend/storage/file/buffile.c
+++ b/src/backend/storage/file/buffile.c
@@ -41,6 +41,8 @@
* The maximum safe file size is presumed to be RELSEG_SIZE * BLCKSZ.
* Note we adhere to this limit whether or not LET_OS_MANAGE_FILESIZE
* is defined, although md.c ignores it when that symbol is defined.
+ * The reason for doing this is that we'd like large temporary BufFiles
+ * to be spread across multiple tablespaces when available.
*/
#define MAX_PHYSICAL_FILESIZE (RELSEG_SIZE * BLCKSZ)
@@ -60,7 +62,6 @@ struct BufFile
* offsets[i] is the current seek position of files[i]. We use this to
* avoid making redundant FileSeek calls.
*/
- Oid tblspcOid; /* tablespace to use (InvalidOid = default) */
bool isTemp; /* can only add files if this is TRUE */
bool isInterXact; /* keep open over transactions? */
@@ -86,7 +87,7 @@ static int BufFileFlush(BufFile *file);
/*
* Create a BufFile given the first underlying physical file.
- * NOTE: caller must set tblspcOid, isTemp, isInterXact if appropriate.
+ * NOTE: caller must set isTemp and isInterXact if appropriate.
*/
static BufFile *
makeBufFile(File firstfile)
@@ -98,7 +99,6 @@ makeBufFile(File firstfile)
file->files[0] = firstfile;
file->offsets = (long *) palloc(sizeof(long));
file->offsets[0] = 0L;
- file->tblspcOid = InvalidOid;
file->isTemp = false;
file->isInterXact = false;
file->dirty = false;
@@ -119,7 +119,7 @@ extendBufFile(BufFile *file)
File pfile;
Assert(file->isTemp);
- pfile = OpenTemporaryFile(file->isInterXact, file->tblspcOid);
+ pfile = OpenTemporaryFile(file->isInterXact);
Assert(pfile >= 0);
file->files = (File *) repalloc(file->files,
@@ -137,23 +137,21 @@ extendBufFile(BufFile *file)
* written to it).
*
* If interXact is true, the temp file will not be automatically deleted
- * at end of transaction. If tblspcOid is not InvalidOid, the temp file
- * is created in the specified tablespace instead of the default one.
+ * at end of transaction.
*
* Note: if interXact is true, the caller had better be calling us in a
* memory context that will survive across transaction boundaries.
*/
BufFile *
-BufFileCreateTemp(bool interXact, Oid tblspcOid)
+BufFileCreateTemp(bool interXact)
{
BufFile *file;
File pfile;
- pfile = OpenTemporaryFile(interXact, tblspcOid);
+ pfile = OpenTemporaryFile(interXact);
Assert(pfile >= 0);
file = makeBufFile(pfile);
- file->tblspcOid = tblspcOid;
file->isTemp = true;
file->isInterXact = interXact;
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 22019db1ce..d6d6070cc7 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -185,6 +185,14 @@ static AllocateDesc allocatedDescs[MAX_ALLOCATED_DESCS];
*/
static long tempFileCounter = 0;
+/*
+ * Array of OIDs of temp tablespaces. When numTempTableSpaces is -1,
+ * this has not been set in the current transaction.
+ */
+static Oid *tempTableSpaces = NULL;
+static int numTempTableSpaces = -1;
+static int nextTempTableSpace = 0;
+
/*--------------------
*
@@ -840,21 +848,28 @@ PathNameOpenFile(FileName fileName, int fileFlags, int fileMode)
* that created them, so this should be false -- but if you need
* "somewhat" temporary storage, this might be useful. In either case,
* the file is removed when the File is explicitly closed.
- *
- * tblspcOid: the Oid of the tablespace where the temp file should be created.
- * If InvalidOid, or if the tablespace can't be found, we silently fall back
- * to the database's default tablespace.
*/
File
-OpenTemporaryFile(bool interXact, Oid tblspcOid)
+OpenTemporaryFile(bool interXact)
{
File file = 0;
/*
- * If caller specified a tablespace, try to create there.
+ * If some temp tablespace(s) have been given to us, try to use the next
+ * one. If a given tablespace can't be found, we silently fall back
+ * to the database's default tablespace.
+ *
+ * BUT: if the temp file is slated to outlive the current transaction,
+ * force it into the database's default tablespace, so that it will
+ * not pose a threat to possible tablespace drop attempts.
*/
- if (OidIsValid(tblspcOid))
- file = OpenTemporaryFileInTablespace(tblspcOid, false);
+ if (numTempTableSpaces > 0 && !interXact)
+ {
+ Oid tblspcOid = GetNextTempTableSpace();
+
+ if (OidIsValid(tblspcOid))
+ file = OpenTemporaryFileInTablespace(tblspcOid, false);
+ }
/*
* If not, or if tablespace is bad, create in database's default
@@ -1530,6 +1545,69 @@ closeAllVfds(void)
}
}
+
+/*
+ * SetTempTablespaces
+ *
+ * Define a list (actually an array) of OIDs of tablespaces to use for
+ * temporary files. This list will be used until end of transaction,
+ * unless this function is called again before then. It is caller's
+ * responsibility that the passed-in array has adequate lifespan (typically
+ * it'd be allocated in TopTransactionContext).
+ */
+void
+SetTempTablespaces(Oid *tableSpaces, int numSpaces)
+{
+ Assert(numSpaces >= 0);
+ tempTableSpaces = tableSpaces;
+ numTempTableSpaces = numSpaces;
+ /*
+ * Select a random starting point in the list. This is to minimize
+ * conflicts between backends that are most likely sharing the same
+ * list of temp tablespaces. Note that if we create multiple temp
+ * files in the same transaction, we'll advance circularly through
+ * the list --- this ensures that large temporary sort files are
+ * nicely spread across all available tablespaces.
+ */
+ if (numSpaces > 1)
+ nextTempTableSpace = random() % numSpaces;
+ else
+ nextTempTableSpace = 0;
+}
+
+/*
+ * TempTablespacesAreSet
+ *
+ * Returns TRUE if SetTempTablespaces has been called in current transaction.
+ * (This is just so that tablespaces.c doesn't need its own per-transaction
+ * state.)
+ */
+bool
+TempTablespacesAreSet(void)
+{
+ return (numTempTableSpaces >= 0);
+}
+
+/*
+ * GetNextTempTableSpace
+ *
+ * Select the next temp tablespace to use. A result of InvalidOid means
+ * to use the current database's default tablespace.
+ */
+Oid
+GetNextTempTableSpace(void)
+{
+ if (numTempTableSpaces > 0)
+ {
+ /* Advance nextTempTableSpace counter with wraparound */
+ if (++nextTempTableSpace >= numTempTableSpaces)
+ nextTempTableSpace = 0;
+ return tempTableSpaces[nextTempTableSpace];
+ }
+ return InvalidOid;
+}
+
+
/*
* AtEOSubXact_Files
*
@@ -1583,11 +1661,14 @@ AtEOSubXact_Files(bool isCommit, SubTransactionId mySubid,
* particularly care which). All still-open per-transaction temporary file
* VFDs are closed, which also causes the underlying files to be
* deleted. Furthermore, all "allocated" stdio files are closed.
+ * We also forget any transaction-local temp tablespace list.
*/
void
AtEOXact_Files(void)
{
CleanupTempFiles(false);
+ tempTableSpaces = NULL;
+ numTempTableSpaces = -1;
}
/*
diff --git a/src/backend/utils/sort/logtape.c b/src/backend/utils/sort/logtape.c
index ff8221a310..d6a4b555bb 100644
--- a/src/backend/utils/sort/logtape.c
+++ b/src/backend/utils/sort/logtape.c
@@ -77,7 +77,6 @@
#include "postgres.h"
-#include "commands/tablespace.h"
#include "storage/buffile.h"
#include "utils/logtape.h"
@@ -529,7 +528,7 @@ LogicalTapeSetCreate(int ntapes)
Assert(ntapes > 0);
lts = (LogicalTapeSet *) palloc(sizeof(LogicalTapeSet) +
(ntapes - 1) *sizeof(LogicalTape));
- lts->pfile = BufFileCreateTemp(false, GetTempTablespace());
+ lts->pfile = BufFileCreateTemp(false);
lts->nFileBlocks = 0L;
lts->forgetFreeSpace = false;
lts->blocksSorted = true; /* a zero-length array is sorted ... */
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 03273317ff..97cd532b71 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -104,6 +104,7 @@
#include "access/nbtree.h"
#include "catalog/pg_amop.h"
#include "catalog/pg_operator.h"
+#include "commands/tablespace.h"
#include "miscadmin.h"
#include "utils/datum.h"
#include "utils/logtape.h"
@@ -1480,6 +1481,12 @@ inittapes(Tuplesortstate *state)
USEMEM(state, tapeSpace);
/*
+ * Make sure that the temp file(s) underlying the tape set are created in
+ * suitable temp tablespaces.
+ */
+ PrepareTempTablespaces();
+
+ /*
* Create the tape set and allocate the per-tape data arrays.
*/
state->tapeset = LogicalTapeSetCreate(maxTapes);
diff --git a/src/backend/utils/sort/tuplestore.c b/src/backend/utils/sort/tuplestore.c
index 0ea2ba7e21..01c0bc8f35 100644
--- a/src/backend/utils/sort/tuplestore.c
+++ b/src/backend/utils/sort/tuplestore.c
@@ -424,15 +424,11 @@ tuplestore_puttuple_common(Tuplestorestate *state, void *tuple)
return;
/*
- * Nope; time to switch to tape-based operation.
- *
- * If the temp table is slated to outlive the current transaction,
- * force it into my database's default tablespace, so that it will
- * not pose a threat to possible tablespace drop attempts.
+ * Nope; time to switch to tape-based operation. Make sure that
+ * the temp file(s) are created in suitable temp tablespaces.
*/
- state->myfile = BufFileCreateTemp(state->interXact,
- state->interXact ? InvalidOid :
- GetTempTablespace());
+ PrepareTempTablespaces();
+ state->myfile = BufFileCreateTemp(state->interXact);
state->status = TSS_WRITEFILE;
dumptuples(state);
break;
diff --git a/src/include/commands/tablespace.h b/src/include/commands/tablespace.h
index 1f0245448e..2b12742b44 100644
--- a/src/include/commands/tablespace.h
+++ b/src/include/commands/tablespace.h
@@ -41,7 +41,8 @@ extern void AlterTableSpaceOwner(const char *name, Oid newOwnerId);
extern void TablespaceCreateDbspace(Oid spcNode, Oid dbNode, bool isRedo);
extern Oid GetDefaultTablespace(bool forTemp);
-extern Oid GetTempTablespace(void);
+
+extern void PrepareTempTablespaces(void);
extern Oid get_tablespace_oid(const char *tablespacename);
extern char *get_tablespace_name(Oid spc_oid);
diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h
index 01dd05d7b5..96ff10c94f 100644
--- a/src/include/executor/hashjoin.h
+++ b/src/include/executor/hashjoin.h
@@ -102,8 +102,6 @@ typedef struct HashJoinTableData
BufFile **innerBatchFile; /* buffered virtual temp file per batch */
BufFile **outerBatchFile; /* buffered virtual temp file per batch */
- Oid hashTblSpc; /* tablespace to put temp files in */
-
/*
* Info about the datatype-specific hash functions for the datatypes being
* hashed. These are arrays of the same length as the number of hash join
diff --git a/src/include/executor/nodeHashjoin.h b/src/include/executor/nodeHashjoin.h
index f457825b00..309566cdfb 100644
--- a/src/include/executor/nodeHashjoin.h
+++ b/src/include/executor/nodeHashjoin.h
@@ -23,8 +23,7 @@ extern TupleTableSlot *ExecHashJoin(HashJoinState *node);
extern void ExecEndHashJoin(HashJoinState *node);
extern void ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt);
-extern void ExecHashJoinSaveTuple(HashJoinTable hashtable,
- MinimalTuple tuple, uint32 hashvalue,
- BufFile **fileptr);
+extern void ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue,
+ BufFile **fileptr);
#endif /* NODEHASHJOIN_H */
diff --git a/src/include/storage/buffile.h b/src/include/storage/buffile.h
index 76ba602e9f..fd4c782368 100644
--- a/src/include/storage/buffile.h
+++ b/src/include/storage/buffile.h
@@ -34,7 +34,7 @@ typedef struct BufFile BufFile;
* prototypes for functions in buffile.c
*/
-extern BufFile *BufFileCreateTemp(bool interXact, Oid tblspcOid);
+extern BufFile *BufFileCreateTemp(bool interXact);
extern void BufFileClose(BufFile *file);
extern size_t BufFileRead(BufFile *file, void *ptr, size_t size);
extern size_t BufFileWrite(BufFile *file, void *ptr, size_t size);
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 6a8847db17..c033301053 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -60,7 +60,7 @@ extern int max_files_per_process;
/* Operations on virtual Files --- equivalent to Unix kernel file ops */
extern File PathNameOpenFile(FileName fileName, int fileFlags, int fileMode);
-extern File OpenTemporaryFile(bool interXact, Oid tblspcOid);
+extern File OpenTemporaryFile(bool interXact);
extern void FileClose(File file);
extern void FileUnlink(File file);
extern int FileRead(File file, char *buffer, int amount);
@@ -85,10 +85,14 @@ extern int BasicOpenFile(FileName fileName, int fileFlags, int fileMode);
extern void InitFileAccess(void);
extern void set_max_safe_fds(void);
extern void closeAllVfds(void);
+extern void SetTempTablespaces(Oid *tableSpaces, int numSpaces);
+extern bool TempTablespacesAreSet(void);
+extern Oid GetNextTempTableSpace(void);
extern void AtEOXact_Files(void);
extern void AtEOSubXact_Files(bool isCommit, SubTransactionId mySubid,
SubTransactionId parentSubid);
extern void RemovePgTempFiles(void);
+
extern int pg_fsync(int fd);
extern int pg_fsync_no_writethrough(int fd);
extern int pg_fsync_writethrough(int fd);