*** pgsql/src/backend/commands/tablespace.c 2010/01/07 04:10:39 1.70 --- pgsql/src/backend/commands/tablespace.c 2010/01/12 02:42:51 1.71 *************** *** 15,22 **** * To support file access via the information given in RelFileNode, we * maintain a symbolic-link map in $PGDATA/pg_tblspc. The symlinks are * named by tablespace OIDs and point to the actual tablespace directories. * Thus the full path to an arbitrary file is ! * $PGDATA/pg_tblspc/spcoid/dboid/relfilenode * * There are two tablespaces created at initdb time: pg_global (for shared * tables) and pg_default (for everything else). For backwards compatibility --- 15,25 ---- * To support file access via the information given in RelFileNode, we * maintain a symbolic-link map in $PGDATA/pg_tblspc. The symlinks are * named by tablespace OIDs and point to the actual tablespace directories. + * There is also a per-cluster version directory in each tablespace. * Thus the full path to an arbitrary file is ! * $PGDATA/pg_tblspc/spcoid/PG_MAJORVER_CATVER/dboid/relfilenode ! * e.g. ! * $PGDATA/pg_tblspc/20981/PG_8.5_201001061/719849/83292814 * * There are two tablespaces created at initdb time: pg_global (for shared * tables) and pg_default (for everything else). For backwards compatibility *************** *** 37,43 **** * * * IDENTIFICATION ! * $PostgreSQL: pgsql/src/backend/commands/tablespace.c,v 1.69 2010/01/07 04:05:39 momjian Exp $ * *------------------------------------------------------------------------- */ --- 40,46 ---- * * * IDENTIFICATION ! * $PostgreSQL: pgsql/src/backend/commands/tablespace.c,v 1.70 2010/01/07 04:10:39 momjian Exp $ * *------------------------------------------------------------------------- */ *************** char *default_tablespace = NULL; *** 81,88 **** char *temp_tablespaces = NULL; ! static bool remove_tablespace_directories(Oid tablespaceoid, bool redo); ! static void write_version_file(const char *path); /* --- 84,92 ---- char *temp_tablespaces = NULL; ! static void create_tablespace_directories(const char *location, ! const Oid tablespaceoid); ! static bool destroy_tablespace_directories(Oid tablespaceoid, bool redo); /* *************** TablespaceCreateDbspace(Oid spcNode, Oid *** 146,163 **** { char *parentdir; ! /* Failure other than not exists? */ if (errno != ENOENT || !isRedo) ereport(ERROR, (errcode_for_file_access(), errmsg("could not create directory \"%s\": %m", dir))); ! /* Parent directory must be missing */ parentdir = pstrdup(dir); get_parent_directory(parentdir); ! /* Can't create parent either? */ ! if (mkdir(parentdir, S_IRWXU) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not create directory \"%s\": %m", --- 150,185 ---- { char *parentdir; ! /* Failure other than not exists or not in WAL replay? */ if (errno != ENOENT || !isRedo) ereport(ERROR, (errcode_for_file_access(), errmsg("could not create directory \"%s\": %m", dir))); ! /* ! * Parent directories are missing during WAL replay, so ! * continue by creating simple parent directories ! * rather than a symlink. ! */ ! ! /* create two parents up if not exist */ parentdir = pstrdup(dir); get_parent_directory(parentdir); ! get_parent_directory(parentdir); ! /* Can't create parent and it doesn't already exist? */ ! if (mkdir(parentdir, S_IRWXU) < 0 && errno != EEXIST) ! ereport(ERROR, ! (errcode_for_file_access(), ! errmsg("could not create directory \"%s\": %m", ! parentdir))); ! pfree(parentdir); ! ! /* create one parent up if not exist */ ! parentdir = pstrdup(dir); ! get_parent_directory(parentdir); ! /* Can't create parent and it doesn't already exist? */ ! if (mkdir(parentdir, S_IRWXU) < 0 && errno != EEXIST) ereport(ERROR, (errcode_for_file_access(), errmsg("could not create directory \"%s\": %m", *************** CreateTableSpace(CreateTableSpaceStmt *s *** 212,218 **** HeapTuple tuple; Oid tablespaceoid; char *location; - char *linkloc; Oid ownerId; /* Must be super user */ --- 234,239 ---- *************** CreateTableSpace(CreateTableSpaceStmt *s *** 251,260 **** /* * Check that location isn't too long. Remember that we're going to append ! * '//.' (XXX but do we ever form the whole path ! * explicitly? This may be overly conservative.) */ ! if (strlen(location) >= MAXPGPATH - 1 - OIDCHARS - 1 - OIDCHARS - 1 - OIDCHARS) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("tablespace location \"%s\" is too long", --- 272,282 ---- /* * Check that location isn't too long. Remember that we're going to append ! * 'PG_XXX//.'. FYI, we never actually reference the ! * whole path, but mkdir() uses the first two parts. */ ! if (strlen(location) + 1 + strlen(TABLESPACE_VERSION_DIRECTORY) + 1 + ! OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS > MAXPGPATH) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("tablespace location \"%s\" is too long", *************** CreateTableSpace(CreateTableSpaceStmt *s *** 311,355 **** /* Record dependency on owner */ recordDependencyOnOwner(TableSpaceRelationId, tablespaceoid, ownerId); ! /* ! * Attempt to coerce target directory to safe permissions. If this fails, ! * it doesn't exist or has the wrong owner. ! */ ! if (chmod(location, 0700) != 0) ! ereport(ERROR, ! (errcode_for_file_access(), ! errmsg("could not set permissions on directory \"%s\": %m", ! location))); ! ! /* ! * Check the target directory is empty. ! */ ! if (!directory_is_empty(location)) ! ereport(ERROR, ! (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), ! errmsg("directory \"%s\" is not empty", ! location))); ! ! /* ! * Create the PG_VERSION file in the target directory. This has several ! * purposes: to make sure we can write in the directory, to prevent ! * someone from creating another tablespace pointing at the same directory ! * (the emptiness check above will fail), and to label tablespace ! * directories by PG version. ! */ ! write_version_file(location); ! ! /* ! * All seems well, create the symlink ! */ ! linkloc = (char *) palloc(OIDCHARS + OIDCHARS + 1); ! sprintf(linkloc, "pg_tblspc/%u", tablespaceoid); ! ! if (symlink(location, linkloc) < 0) ! ereport(ERROR, ! (errcode_for_file_access(), ! errmsg("could not create symbolic link \"%s\": %m", ! linkloc))); /* Record the filesystem change in XLOG */ { --- 333,339 ---- /* Record dependency on owner */ recordDependencyOnOwner(TableSpaceRelationId, tablespaceoid, ownerId); ! create_tablespace_directories(location, tablespaceoid); /* Record the filesystem change in XLOG */ { *************** CreateTableSpace(CreateTableSpaceStmt *s *** 378,384 **** */ ForceSyncCommit(); - pfree(linkloc); pfree(location); /* We keep the lock on pg_tablespace until commit */ --- 362,367 ---- *************** DropTableSpace(DropTableSpaceStmt *stmt) *** 478,484 **** /* * Try to remove the physical infrastructure. */ ! if (!remove_tablespace_directories(tablespaceoid, false)) { /* * Not all files deleted? However, there can be lingering empty files --- 461,467 ---- /* * Try to remove the physical infrastructure. */ ! if (!destroy_tablespace_directories(tablespaceoid, false)) { /* * Not all files deleted? However, there can be lingering empty files *************** DropTableSpace(DropTableSpaceStmt *stmt) *** 490,496 **** * out any lingering files, and try again. */ RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT); ! if (!remove_tablespace_directories(tablespaceoid, false)) { /* Still not empty, the files must be important then */ ereport(ERROR, --- 473,479 ---- * out any lingering files, and try again. */ RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT); ! if (!destroy_tablespace_directories(tablespaceoid, false)) { /* Still not empty, the files must be important then */ ereport(ERROR, *************** DropTableSpace(DropTableSpaceStmt *stmt) *** 542,565 **** #endif /* HAVE_SYMLINK */ } /* ! * remove_tablespace_directories: attempt to remove filesystem infrastructure * ! * Returns TRUE if successful, FALSE if some subdirectory is not empty * ! * redo indicates we are redoing a drop from XLOG; okay if nothing there */ static bool ! remove_tablespace_directories(Oid tablespaceoid, bool redo) { ! char *location; DIR *dirdesc; struct dirent *de; char *subfile; struct stat st; ! location = (char *) palloc(OIDCHARS + OIDCHARS + 1); ! sprintf(location, "pg_tblspc/%u", tablespaceoid); /* * Check if the tablespace still contains any files. We try to rmdir each --- 525,621 ---- #endif /* HAVE_SYMLINK */ } + /* ! * create_tablespace_directories * ! * Attempt to create filesystem infrastructure linking $PGDATA/pg_tblspc/ ! * to the specified directory ! */ ! static void ! create_tablespace_directories(const char *location, const Oid tablespaceoid) ! { ! char *linkloc = palloc(OIDCHARS + OIDCHARS + 1); ! char *location_with_version_dir = palloc(strlen(location) + 1 + ! strlen(TABLESPACE_VERSION_DIRECTORY) + 1); ! ! sprintf(linkloc, "pg_tblspc/%u", tablespaceoid); ! sprintf(location_with_version_dir, "%s/%s", location, ! TABLESPACE_VERSION_DIRECTORY); ! ! /* ! * Attempt to coerce target directory to safe permissions. If this fails, ! * it doesn't exist or has the wrong owner. ! */ ! if (chmod(location, 0700) != 0) ! { ! if (errno == ENOENT) ! ereport(ERROR, ! (errcode(ERRCODE_UNDEFINED_FILE), ! errmsg("directory \"%s\" does not exist", ! location))); ! else ! ereport(ERROR, ! (errcode_for_file_access(), ! errmsg("could not set permissions on directory \"%s\": %m", ! location))); ! } ! ! /* ! * The creation of the version directory prevents more than one ! * tablespace in a single location. ! */ ! if (mkdir(location_with_version_dir, S_IRWXU) < 0) ! { ! if (errno == EEXIST) ! ereport(ERROR, ! (errcode(ERRCODE_OBJECT_IN_USE), ! errmsg("directory \"%s\" already in use as a tablespace", ! location_with_version_dir))); ! else ! ereport(ERROR, ! (errcode_for_file_access(), ! errmsg("could not create directory \"%s\": %m", ! location_with_version_dir))); ! } ! ! /* ! * Create the symlink under PGDATA ! */ ! if (symlink(location, linkloc) < 0) ! ereport(ERROR, ! (errcode_for_file_access(), ! errmsg("could not create symbolic link \"%s\": %m", ! linkloc))); ! ! pfree(linkloc); ! pfree(location_with_version_dir); ! } ! ! ! /* ! * destroy_tablespace_directories ! * ! * Attempt to remove filesystem infrastructure ! * ! * 'redo' indicates we are redoing a drop from XLOG; okay if nothing there * ! * Returns TRUE if successful, FALSE if some subdirectory is not empty */ static bool ! destroy_tablespace_directories(Oid tablespaceoid, bool redo) { ! char *linkloc; ! char *linkloc_with_version_dir; DIR *dirdesc; struct dirent *de; char *subfile; struct stat st; ! linkloc_with_version_dir = palloc(9 + 1 + OIDCHARS + 1 + ! strlen(TABLESPACE_VERSION_DIRECTORY)); ! sprintf(linkloc_with_version_dir, "pg_tblspc/%u/%s", tablespaceoid, ! TABLESPACE_VERSION_DIRECTORY); /* * Check if the tablespace still contains any files. We try to rmdir each *************** remove_tablespace_directories(Oid tables *** 582,588 **** * and symlink. We want to allow a new DROP attempt to succeed at * removing the catalog entries, so we should not give a hard error here. */ ! dirdesc = AllocateDir(location); if (dirdesc == NULL) { if (errno == ENOENT) --- 638,644 ---- * and symlink. We want to allow a new DROP attempt to succeed at * removing the catalog entries, so we should not give a hard error here. */ ! dirdesc = AllocateDir(linkloc_with_version_dir); if (dirdesc == NULL) { if (errno == ENOENT) *************** remove_tablespace_directories(Oid tables *** 591,622 **** ereport(WARNING, (errcode_for_file_access(), errmsg("could not open directory \"%s\": %m", ! location))); ! pfree(location); return true; } /* else let ReadDir report the error */ } ! while ((de = ReadDir(dirdesc, location)) != NULL) { - /* Note we ignore PG_VERSION for the nonce */ if (strcmp(de->d_name, ".") == 0 || ! strcmp(de->d_name, "..") == 0 || ! strcmp(de->d_name, "PG_VERSION") == 0) continue; ! subfile = palloc(strlen(location) + 1 + strlen(de->d_name) + 1); ! sprintf(subfile, "%s/%s", location, de->d_name); /* This check is just to deliver a friendlier error message */ if (!directory_is_empty(subfile)) { FreeDir(dirdesc); return false; } ! /* Do the real deed */ if (rmdir(subfile) < 0) ereport(ERROR, (errcode_for_file_access(), --- 647,678 ---- ereport(WARNING, (errcode_for_file_access(), errmsg("could not open directory \"%s\": %m", ! linkloc_with_version_dir))); ! pfree(linkloc_with_version_dir); return true; } /* else let ReadDir report the error */ } ! while ((de = ReadDir(dirdesc, linkloc_with_version_dir)) != NULL) { if (strcmp(de->d_name, ".") == 0 || ! strcmp(de->d_name, "..") == 0) continue; ! subfile = palloc(strlen(linkloc_with_version_dir) + 1 + strlen(de->d_name) + 1); ! sprintf(subfile, "%s/%s", linkloc_with_version_dir, de->d_name); /* This check is just to deliver a friendlier error message */ if (!directory_is_empty(subfile)) { FreeDir(dirdesc); + pfree(subfile); + pfree(linkloc_with_version_dir); return false; } ! /* remove empty directory */ if (rmdir(subfile) < 0) ereport(ERROR, (errcode_for_file_access(), *************** remove_tablespace_directories(Oid tables *** 628,706 **** FreeDir(dirdesc); /* ! * Okay, try to unlink PG_VERSION (we allow it to not be there, even in ! * non-REDO case, for robustness). ! */ ! subfile = palloc(strlen(location) + 11 + 1); ! sprintf(subfile, "%s/PG_VERSION", location); ! ! if (unlink(subfile) < 0) ! { ! if (errno != ENOENT) ! ereport(ERROR, ! (errcode_for_file_access(), ! errmsg("could not remove file \"%s\": %m", ! subfile))); ! } ! ! pfree(subfile); ! ! /* ! * Okay, try to remove the symlink. We must however deal with the * possibility that it's a directory instead of a symlink --- this could * happen during WAL replay (see TablespaceCreateDbspace), and it is also ! * the normal case on Windows. */ ! if (lstat(location, &st) == 0 && S_ISDIR(st.st_mode)) { ! if (rmdir(location) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not remove directory \"%s\": %m", ! location))); } else { ! if (unlink(location) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not remove symbolic link \"%s\": %m", ! location))); } ! pfree(location); return true; } - /* - * write out the PG_VERSION file in the specified directory - */ - static void - write_version_file(const char *path) - { - char *fullname; - FILE *version_file; - - /* Now write the file */ - fullname = palloc(strlen(path) + 11 + 1); - sprintf(fullname, "%s/PG_VERSION", path); - - if ((version_file = AllocateFile(fullname, PG_BINARY_W)) == NULL) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not write to file \"%s\": %m", - fullname))); - fprintf(version_file, "%s\n", PG_MAJORVERSION); - if (FreeFile(version_file)) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not write to file \"%s\": %m", - fullname))); - - pfree(fullname); - } /* * Check if a directory is empty. --- 684,727 ---- FreeDir(dirdesc); + /* remove version directory */ + if (rmdir(linkloc_with_version_dir) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not remove directory \"%s\": %m", + linkloc_with_version_dir))); + /* ! * Try to remove the symlink. We must however deal with the * possibility that it's a directory instead of a symlink --- this could * happen during WAL replay (see TablespaceCreateDbspace), and it is also ! * the case on Windows where junction points lstat() as directories. */ ! linkloc = pstrdup(linkloc_with_version_dir); ! get_parent_directory(linkloc); ! if (lstat(linkloc, &st) == 0 && S_ISDIR(st.st_mode)) { ! if (rmdir(linkloc) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not remove directory \"%s\": %m", ! linkloc))); } else { ! if (unlink(linkloc) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not remove symbolic link \"%s\": %m", ! linkloc))); } ! pfree(linkloc_with_version_dir); ! pfree(linkloc); return true; } /* * Check if a directory is empty. *************** directory_is_empty(const char *path) *** 728,733 **** --- 749,755 ---- return true; } + /* * Rename a tablespace */ *************** tblspc_redo(XLogRecPtr lsn, XLogRecord * *** 1336,1370 **** { xl_tblspc_create_rec *xlrec = (xl_tblspc_create_rec *) XLogRecGetData(record); char *location = xlrec->ts_path; - char *linkloc; - - /* - * Attempt to coerce target directory to safe permissions. If this - * fails, it doesn't exist or has the wrong owner. - */ - if (chmod(location, 0700) != 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not set permissions on directory \"%s\": %m", - location))); - - /* Create or re-create the PG_VERSION file in the target directory */ - write_version_file(location); - - /* Create the symlink if not already present */ - linkloc = (char *) palloc(OIDCHARS + OIDCHARS + 1); - sprintf(linkloc, "pg_tblspc/%u", xlrec->ts_id); - - if (symlink(location, linkloc) < 0) - { - if (errno != EEXIST) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not create symbolic link \"%s\": %m", - linkloc))); - } ! pfree(linkloc); } else if (info == XLOG_TBLSPC_DROP) { --- 1358,1365 ---- { xl_tblspc_create_rec *xlrec = (xl_tblspc_create_rec *) XLogRecGetData(record); char *location = xlrec->ts_path; ! create_tablespace_directories(location, xlrec->ts_id); } else if (info == XLOG_TBLSPC_DROP) { *************** tblspc_redo(XLogRecPtr lsn, XLogRecord * *** 1380,1386 **** * remove all files then do conflict processing and try again, * if currently enabled. */ ! if (!remove_tablespace_directories(xlrec->ts_id, true)) { VirtualTransactionId *temp_file_users; --- 1375,1381 ---- * remove all files then do conflict processing and try again, * if currently enabled. */ ! if (!destroy_tablespace_directories(xlrec->ts_id, true)) { VirtualTransactionId *temp_file_users; *************** tblspc_redo(XLogRecPtr lsn, XLogRecord * *** 1416,1422 **** * exited by now. So lets recheck before we throw an error. * If !process_conflicts then this will just fail again. */ ! if (!remove_tablespace_directories(xlrec->ts_id, true)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("tablespace %u is not empty", --- 1411,1417 ---- * exited by now. So lets recheck before we throw an error. * If !process_conflicts then this will just fail again. */ ! if (!destroy_tablespace_directories(xlrec->ts_id, true)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("tablespace %u is not empty",