diff options
Diffstat (limited to 'src/bin/pg_dump/compress_io.c')
-rw-r--r-- | src/bin/pg_dump/compress_io.c | 741 |
1 files changed, 121 insertions, 620 deletions
diff --git a/src/bin/pg_dump/compress_io.c b/src/bin/pg_dump/compress_io.c index 5ac21f091f0..c2eb3dbb4a4 100644 --- a/src/bin/pg_dump/compress_io.c +++ b/src/bin/pg_dump/compress_io.c @@ -9,42 +9,51 @@ * * This file includes two APIs for dealing with compressed data. The first * provides more flexibility, using callbacks to read/write data from the - * underlying stream. The second API is a wrapper around fopen/gzopen and + * underlying stream. The second API is a wrapper around fopen and * friends, providing an interface similar to those, but abstracts away - * the possible compression. Both APIs use libz for the compression, but - * the second API uses gzip headers, so the resulting files can be easily - * manipulated with the gzip utility. + * the possible compression. The second API is aimed for the resulting + * files to be easily manipulated with an external compression utility + * program. * * Compressor API * -------------- * * The interface for writing to an archive consists of three functions: - * AllocateCompressor, WriteDataToArchive and EndCompressor. First you call - * AllocateCompressor, then write all the data by calling WriteDataToArchive - * as many times as needed, and finally EndCompressor. WriteDataToArchive - * and EndCompressor will call the WriteFunc that was provided to - * AllocateCompressor for each chunk of compressed data. + * AllocateCompressor, writeData, and EndCompressor. First you call + * AllocateCompressor, then write all the data by calling writeData as many + * times as needed, and finally EndCompressor. writeData will call the + * WriteFunc that was provided to AllocateCompressor for each chunk of + * compressed data. * - * The interface for reading an archive consists of just one function: - * ReadDataFromArchive. ReadDataFromArchive reads the whole compressed input - * stream, by repeatedly calling the given ReadFunc. ReadFunc returns the - * compressed data chunk at a time, and ReadDataFromArchive decompresses it - * and passes the decompressed data to ahwrite(), until ReadFunc returns 0 - * to signal EOF. - * - * The interface is the same for compressed and uncompressed streams. + * The interface for reading an archive consists of the same three functions: + * AllocateCompressor, readData, and EndCompressor. First you call + * AllocateCompressor, then read all the data by calling readData to read the + * whole compressed stream which repeatedly calls the given ReadFunc. ReadFunc + * returns the compressed data one chunk at a time. Then readData decompresses + * it and passes the decompressed data to ahwrite(), until ReadFunc returns 0 + * to signal EOF. The interface is the same for compressed and uncompressed + * streams. * * Compressed stream API * ---------------------- * - * The compressed stream API is a wrapper around the C standard fopen() and - * libz's gzopen() APIs. It allows you to use the same functions for - * compressed and uncompressed streams. cfopen_read() first tries to open - * the file with given name, and if it fails, it tries to open the same - * file with the .gz suffix. cfopen_write() opens a file for writing, an - * extra argument specifies if the file should be compressed, and adds the - * .gz suffix to the filename if so. This allows you to easily handle both - * compressed and uncompressed files. + * The compressed stream API is providing a set of function pointers for + * opening, reading, writing, and finally closing files. The implemented + * function pointers are documented in the corresponding header file and are + * common for all streams. It allows the caller to use the same functions for + * both compressed and uncompressed streams. + * + * The interface consists of three functions, InitCompressFileHandle, + * InitDiscoverCompressFileHandle, and EndCompressFileHandle. If the + * compression is known, then start by calling InitCompressFileHandle, + * otherwise discover it by using InitDiscoverCompressFileHandle. Then call + * the function pointers as required for the read/write operations. Finally + * call EndCompressFileHandle to end the stream. + * + * InitDiscoverCompressFileHandle tries to infer the compression by the + * filename suffix. If the suffix is not yet known then it tries to simply + * open the file and if it fails, it tries to open the same file with the .gz + * suffix. * * IDENTIFICATION * src/bin/pg_dump/compress_io.c @@ -53,13 +62,14 @@ */ #include "postgres_fe.h" +#include <sys/stat.h> +#include <unistd.h> + +#include "compress_gzip.h" #include "compress_io.h" +#include "compress_none.h" #include "pg_backup_utils.h" -#ifdef HAVE_LIBZ -#include <zlib.h> -#endif - /*---------------------- * Generic functions *---------------------- @@ -96,110 +106,25 @@ supports_compression(const pg_compress_specification compression_spec) *---------------------- */ -/* typedef appears in compress_io.h */ -struct CompressorState -{ - pg_compress_specification compression_spec; - WriteFunc writeF; - -#ifdef HAVE_LIBZ - z_streamp zp; - char *zlibOut; - size_t zlibOutSize; -#endif -}; - -/* Routines that support zlib compressed data I/O */ -#ifdef HAVE_LIBZ -static void InitCompressorZlib(CompressorState *cs, int level); -static void DeflateCompressorZlib(ArchiveHandle *AH, CompressorState *cs, - bool flush); -static void ReadDataFromArchiveZlib(ArchiveHandle *AH, ReadFunc readF); -static void WriteDataToArchiveZlib(ArchiveHandle *AH, CompressorState *cs, - const char *data, size_t dLen); -static void EndCompressorZlib(ArchiveHandle *AH, CompressorState *cs); -#endif - -/* Routines that support uncompressed data I/O */ -static void ReadDataFromArchiveNone(ArchiveHandle *AH, ReadFunc readF); -static void WriteDataToArchiveNone(ArchiveHandle *AH, CompressorState *cs, - const char *data, size_t dLen); - -/* Public interface routines */ - -/* Allocate a new compressor */ +/* + * Allocate a new compressor. + */ CompressorState * AllocateCompressor(const pg_compress_specification compression_spec, - WriteFunc writeF) + ReadFunc readF, WriteFunc writeF) { CompressorState *cs; -#ifndef HAVE_LIBZ - if (compression_spec.algorithm == PG_COMPRESSION_GZIP) - pg_fatal("this build does not support compression with %s", "gzip"); -#endif - cs = (CompressorState *) pg_malloc0(sizeof(CompressorState)); + cs->readF = readF; cs->writeF = writeF; - cs->compression_spec = compression_spec; - - /* - * Perform compression algorithm specific initialization. - */ -#ifdef HAVE_LIBZ - if (cs->compression_spec.algorithm == PG_COMPRESSION_GZIP) - InitCompressorZlib(cs, cs->compression_spec.level); -#endif - - return cs; -} -/* - * Read all compressed data from the input stream (via readF) and print it - * out with ahwrite(). - */ -void -ReadDataFromArchive(ArchiveHandle *AH, - const pg_compress_specification compression_spec, - ReadFunc readF) -{ if (compression_spec.algorithm == PG_COMPRESSION_NONE) - ReadDataFromArchiveNone(AH, readF); - if (compression_spec.algorithm == PG_COMPRESSION_GZIP) - { -#ifdef HAVE_LIBZ - ReadDataFromArchiveZlib(AH, readF); -#else - pg_fatal("this build does not support compression with %s", "gzip"); -#endif - } -} + InitCompressorNone(cs, compression_spec); + else if (compression_spec.algorithm == PG_COMPRESSION_GZIP) + InitCompressorGzip(cs, compression_spec); -/* - * Compress and write data to the output stream (via writeF). - */ -void -WriteDataToArchive(ArchiveHandle *AH, CompressorState *cs, - const void *data, size_t dLen) -{ - switch (cs->compression_spec.algorithm) - { - case PG_COMPRESSION_GZIP: -#ifdef HAVE_LIBZ - WriteDataToArchiveZlib(AH, cs, data, dLen); -#else - pg_fatal("this build does not support compression with %s", "gzip"); -#endif - break; - case PG_COMPRESSION_NONE: - WriteDataToArchiveNone(AH, cs, data, dLen); - break; - case PG_COMPRESSION_LZ4: - /* fallthrough */ - case PG_COMPRESSION_ZSTD: - pg_fatal("invalid compression method"); - break; - } + return cs; } /* @@ -208,233 +133,31 @@ WriteDataToArchive(ArchiveHandle *AH, CompressorState *cs, void EndCompressor(ArchiveHandle *AH, CompressorState *cs) { -#ifdef HAVE_LIBZ - if (cs->compression_spec.algorithm == PG_COMPRESSION_GZIP) - EndCompressorZlib(AH, cs); -#endif - free(cs); -} - -/* Private routines, specific to each compression method. */ - -#ifdef HAVE_LIBZ -/* - * Functions for zlib compressed output. - */ - -static void -InitCompressorZlib(CompressorState *cs, int level) -{ - z_streamp zp; - - zp = cs->zp = (z_streamp) pg_malloc(sizeof(z_stream)); - zp->zalloc = Z_NULL; - zp->zfree = Z_NULL; - zp->opaque = Z_NULL; - - /* - * zlibOutSize is the buffer size we tell zlib it can output to. We - * actually allocate one extra byte because some routines want to append a - * trailing zero byte to the zlib output. - */ - cs->zlibOut = (char *) pg_malloc(ZLIB_OUT_SIZE + 1); - cs->zlibOutSize = ZLIB_OUT_SIZE; - - if (deflateInit(zp, level) != Z_OK) - pg_fatal("could not initialize compression library: %s", - zp->msg); - - /* Just be paranoid - maybe End is called after Start, with no Write */ - zp->next_out = (void *) cs->zlibOut; - zp->avail_out = cs->zlibOutSize; -} - -static void -EndCompressorZlib(ArchiveHandle *AH, CompressorState *cs) -{ - z_streamp zp = cs->zp; - - zp->next_in = NULL; - zp->avail_in = 0; - - /* Flush any remaining data from zlib buffer */ - DeflateCompressorZlib(AH, cs, true); - - if (deflateEnd(zp) != Z_OK) - pg_fatal("could not close compression stream: %s", zp->msg); - - free(cs->zlibOut); - free(cs->zp); -} - -static void -DeflateCompressorZlib(ArchiveHandle *AH, CompressorState *cs, bool flush) -{ - z_streamp zp = cs->zp; - char *out = cs->zlibOut; - int res = Z_OK; - - while (cs->zp->avail_in != 0 || flush) - { - res = deflate(zp, flush ? Z_FINISH : Z_NO_FLUSH); - if (res == Z_STREAM_ERROR) - pg_fatal("could not compress data: %s", zp->msg); - if ((flush && (zp->avail_out < cs->zlibOutSize)) - || (zp->avail_out == 0) - || (zp->avail_in != 0) - ) - { - /* - * Extra paranoia: avoid zero-length chunks, since a zero length - * chunk is the EOF marker in the custom format. This should never - * happen but... - */ - if (zp->avail_out < cs->zlibOutSize) - { - /* - * Any write function should do its own error checking but to - * make sure we do a check here as well... - */ - size_t len = cs->zlibOutSize - zp->avail_out; - - cs->writeF(AH, out, len); - } - zp->next_out = (void *) out; - zp->avail_out = cs->zlibOutSize; - } - - if (res == Z_STREAM_END) - break; - } -} - -static void -WriteDataToArchiveZlib(ArchiveHandle *AH, CompressorState *cs, - const char *data, size_t dLen) -{ - cs->zp->next_in = (void *) unconstify(char *, data); - cs->zp->avail_in = dLen; - DeflateCompressorZlib(AH, cs, false); -} - -static void -ReadDataFromArchiveZlib(ArchiveHandle *AH, ReadFunc readF) -{ - z_streamp zp; - char *out; - int res = Z_OK; - size_t cnt; - char *buf; - size_t buflen; - - zp = (z_streamp) pg_malloc(sizeof(z_stream)); - zp->zalloc = Z_NULL; - zp->zfree = Z_NULL; - zp->opaque = Z_NULL; - - buf = pg_malloc(ZLIB_IN_SIZE); - buflen = ZLIB_IN_SIZE; - - out = pg_malloc(ZLIB_OUT_SIZE + 1); - - if (inflateInit(zp) != Z_OK) - pg_fatal("could not initialize compression library: %s", - zp->msg); - - /* no minimal chunk size for zlib */ - while ((cnt = readF(AH, &buf, &buflen))) - { - zp->next_in = (void *) buf; - zp->avail_in = cnt; - - while (zp->avail_in > 0) - { - zp->next_out = (void *) out; - zp->avail_out = ZLIB_OUT_SIZE; - - res = inflate(zp, 0); - if (res != Z_OK && res != Z_STREAM_END) - pg_fatal("could not uncompress data: %s", zp->msg); - - out[ZLIB_OUT_SIZE - zp->avail_out] = '\0'; - ahwrite(out, 1, ZLIB_OUT_SIZE - zp->avail_out, AH); - } - } - - zp->next_in = NULL; - zp->avail_in = 0; - while (res != Z_STREAM_END) - { - zp->next_out = (void *) out; - zp->avail_out = ZLIB_OUT_SIZE; - res = inflate(zp, 0); - if (res != Z_OK && res != Z_STREAM_END) - pg_fatal("could not uncompress data: %s", zp->msg); - - out[ZLIB_OUT_SIZE - zp->avail_out] = '\0'; - ahwrite(out, 1, ZLIB_OUT_SIZE - zp->avail_out, AH); - } - - if (inflateEnd(zp) != Z_OK) - pg_fatal("could not close compression library: %s", zp->msg); - - free(buf); - free(out); - free(zp); -} -#endif /* HAVE_LIBZ */ - - -/* - * Functions for uncompressed output. - */ - -static void -ReadDataFromArchiveNone(ArchiveHandle *AH, ReadFunc readF) -{ - size_t cnt; - char *buf; - size_t buflen; - - buf = pg_malloc(ZLIB_OUT_SIZE); - buflen = ZLIB_OUT_SIZE; - - while ((cnt = readF(AH, &buf, &buflen))) - { - ahwrite(buf, 1, cnt, AH); - } - - free(buf); -} - -static void -WriteDataToArchiveNone(ArchiveHandle *AH, CompressorState *cs, - const char *data, size_t dLen) -{ - cs->writeF(AH, data, dLen); + cs->end(AH, cs); + pg_free(cs); } - /*---------------------- * Compressed stream API *---------------------- */ /* - * cfp represents an open stream, wrapping the underlying FILE or gzFile - * pointer. This is opaque to the callers. + * Private routines */ -struct cfp +static int +hasSuffix(const char *filename, const char *suffix) { - FILE *uncompressedfp; -#ifdef HAVE_LIBZ - gzFile compressedfp; -#endif -}; + int filenamelen = strlen(filename); + int suffixlen = strlen(suffix); -#ifdef HAVE_LIBZ -static int hasSuffix(const char *filename, const char *suffix); -#endif + if (filenamelen < suffixlen) + return 0; + + return memcmp(&filename[filenamelen - suffixlen], + suffix, + suffixlen) == 0; +} /* free() without changing errno; useful in several places below */ static void @@ -447,324 +170,102 @@ free_keep_errno(void *p) } /* - * Open a file for reading. 'path' is the file to open, and 'mode' should - * be either "r" or "rb". - * - * If the file at 'path' does not exist, we append the ".gz" suffix (if 'path' - * doesn't already have it) and try again. So if you pass "foo" as 'path', - * this will open either "foo" or "foo.gz". - * - * On failure, return NULL with an error code in errno. - */ -cfp * -cfopen_read(const char *path, const char *mode) -{ - cfp *fp; - - pg_compress_specification compression_spec = {0}; - -#ifdef HAVE_LIBZ - if (hasSuffix(path, ".gz")) - { - compression_spec.algorithm = PG_COMPRESSION_GZIP; - fp = cfopen(path, mode, compression_spec); - } - else -#endif - { - compression_spec.algorithm = PG_COMPRESSION_NONE; - fp = cfopen(path, mode, compression_spec); -#ifdef HAVE_LIBZ - if (fp == NULL) - { - char *fname; - - fname = psprintf("%s.gz", path); - compression_spec.algorithm = PG_COMPRESSION_GZIP; - fp = cfopen(fname, mode, compression_spec); - free_keep_errno(fname); - } -#endif - } - return fp; -} - -/* - * Open a file for writing. 'path' indicates the path name, and 'mode' must - * be a filemode as accepted by fopen() and gzopen() that indicates writing - * ("w", "wb", "a", or "ab"). - * - * If 'compression_spec.algorithm' is GZIP, a gzip compressed stream is opened, - * and 'compression_spec.level' used. The ".gz" suffix is automatically added to - * 'path' in that case. - * - * On failure, return NULL with an error code in errno. + * Public interface */ -cfp * -cfopen_write(const char *path, const char *mode, - const pg_compress_specification compression_spec) -{ - cfp *fp; - - if (compression_spec.algorithm == PG_COMPRESSION_NONE) - fp = cfopen(path, mode, compression_spec); - else - { -#ifdef HAVE_LIBZ - char *fname; - - fname = psprintf("%s.gz", path); - fp = cfopen(fname, mode, compression_spec); - free_keep_errno(fname); -#else - pg_fatal("this build does not support compression with %s", "gzip"); - fp = NULL; /* keep compiler quiet */ -#endif - } - return fp; -} /* - * This is the workhorse for cfopen() or cfdopen(). It opens file 'path' or - * associates a stream 'fd', if 'fd' is a valid descriptor, in 'mode'. The - * descriptor is not dup'ed and it is the caller's responsibility to do so. - * The caller must verify that the 'compress_algorithm' is supported by the - * current build. - * - * On failure, return NULL with an error code in errno. + * Initialize a compress file handle for the specified compression algorithm. */ -static cfp * -cfopen_internal(const char *path, int fd, const char *mode, - pg_compress_specification compression_spec) +CompressFileHandle * +InitCompressFileHandle(const pg_compress_specification compression_spec) { - cfp *fp = pg_malloc0(sizeof(cfp)); - - if (compression_spec.algorithm == PG_COMPRESSION_GZIP) - { -#ifdef HAVE_LIBZ - if (compression_spec.level != Z_DEFAULT_COMPRESSION) - { - /* user has specified a compression level, so tell zlib to use it */ - char mode_compression[32]; - - snprintf(mode_compression, sizeof(mode_compression), "%s%d", - mode, compression_spec.level); - if (fd >= 0) - fp->compressedfp = gzdopen(fd, mode_compression); - else - fp->compressedfp = gzopen(path, mode_compression); - } - else - { - /* don't specify a level, just use the zlib default */ - if (fd >= 0) - fp->compressedfp = gzdopen(fd, mode); - else - fp->compressedfp = gzopen(path, mode); - } + CompressFileHandle *CFH; - if (fp->compressedfp == NULL) - { - free_keep_errno(fp); - fp = NULL; - } -#else - pg_fatal("this build does not support compression with %s", "gzip"); -#endif - } - else - { - if (fd >= 0) - fp->uncompressedfp = fdopen(fd, mode); - else - fp->uncompressedfp = fopen(path, mode); + CFH = pg_malloc0(sizeof(CompressFileHandle)); - if (fp->uncompressedfp == NULL) - { - free_keep_errno(fp); - fp = NULL; - } - } + if (compression_spec.algorithm == PG_COMPRESSION_NONE) + InitCompressFileHandleNone(CFH, compression_spec); + else if (compression_spec.algorithm == PG_COMPRESSION_GZIP) + InitCompressFileHandleGzip(CFH, compression_spec); - return fp; + return CFH; } /* - * Opens file 'path' in 'mode' and compression as defined in - * compression_spec. The caller must verify that the compression - * is supported by the current build. + * Open a file for reading. 'path' is the file to open, and 'mode' should + * be either "r" or "rb". * - * On failure, return NULL with an error code in errno. - */ -cfp * -cfopen(const char *path, const char *mode, - const pg_compress_specification compression_spec) -{ - return cfopen_internal(path, -1, mode, compression_spec); -} - -/* - * Associates a stream 'fd', if 'fd' is a valid descriptor, in 'mode' - * and compression as defined in compression_spec. The caller must - * verify that the compression is supported by the current build. + * If the file at 'path' contains the suffix of a supported compression method, + * currently this includes only ".gz", then this compression will be used + * throughout. Otherwise the compression will be inferred by iteratively trying + * to open the file at 'path', first as is, then by appending known compression + * suffixes. So if you pass "foo" as 'path', this will open either "foo" or + * "foo.gz", trying in that order. * * On failure, return NULL with an error code in errno. */ -cfp * -cfdopen(int fd, const char *mode, - const pg_compress_specification compression_spec) +CompressFileHandle * +InitDiscoverCompressFileHandle(const char *path, const char *mode) { - return cfopen_internal(NULL, fd, mode, compression_spec); -} + CompressFileHandle *CFH = NULL; + struct stat st; + char *fname; + pg_compress_specification compression_spec = {0}; -int -cfread(void *ptr, int size, cfp *fp) -{ - int ret; + compression_spec.algorithm = PG_COMPRESSION_NONE; - if (size == 0) - return 0; + Assert(strcmp(mode, PG_BINARY_R) == 0); -#ifdef HAVE_LIBZ - if (fp->compressedfp) - { - ret = gzread(fp->compressedfp, ptr, size); - if (ret != size && !gzeof(fp->compressedfp)) - { - int errnum; - const char *errmsg = gzerror(fp->compressedfp, &errnum); + fname = strdup(path); - pg_fatal("could not read from input file: %s", - errnum == Z_ERRNO ? strerror(errno) : errmsg); - } - } + if (hasSuffix(fname, ".gz")) + compression_spec.algorithm = PG_COMPRESSION_GZIP; else -#endif { - ret = fread(ptr, 1, size, fp->uncompressedfp); - if (ret != size && !feof(fp->uncompressedfp)) - READ_ERROR_EXIT(fp->uncompressedfp); - } - return ret; -} - -int -cfwrite(const void *ptr, int size, cfp *fp) -{ -#ifdef HAVE_LIBZ - if (fp->compressedfp) - return gzwrite(fp->compressedfp, ptr, size); - else -#endif - return fwrite(ptr, 1, size, fp->uncompressedfp); -} - -int -cfgetc(cfp *fp) -{ - int ret; + bool exists; + exists = (stat(path, &st) == 0); + /* avoid unused warning if it is not built with compression */ + if (exists) + compression_spec.algorithm = PG_COMPRESSION_NONE; #ifdef HAVE_LIBZ - if (fp->compressedfp) - { - ret = gzgetc(fp->compressedfp); - if (ret == EOF) + if (!exists) { - if (!gzeof(fp->compressedfp)) - pg_fatal("could not read from input file: %s", strerror(errno)); - else - pg_fatal("could not read from input file: end of file"); + free_keep_errno(fname); + fname = psprintf("%s.gz", path); + exists = (stat(fname, &st) == 0); + + if (exists) + compression_spec.algorithm = PG_COMPRESSION_GZIP; } - } - else #endif - { - ret = fgetc(fp->uncompressedfp); - if (ret == EOF) - READ_ERROR_EXIT(fp->uncompressedfp); } - return ret; -} - -char * -cfgets(cfp *fp, char *buf, int len) -{ -#ifdef HAVE_LIBZ - if (fp->compressedfp) - return gzgets(fp->compressedfp, buf, len); - else -#endif - return fgets(buf, len, fp->uncompressedfp); -} - -int -cfclose(cfp *fp) -{ - int result; - - if (fp == NULL) - { - errno = EBADF; - return EOF; - } -#ifdef HAVE_LIBZ - if (fp->compressedfp) + CFH = InitCompressFileHandle(compression_spec); + if (CFH->open_func(fname, -1, mode, CFH)) { - result = gzclose(fp->compressedfp); - fp->compressedfp = NULL; + free_keep_errno(CFH); + CFH = NULL; } - else -#endif - { - result = fclose(fp->uncompressedfp); - fp->uncompressedfp = NULL; - } - free_keep_errno(fp); + free_keep_errno(fname); - return result; + return CFH; } +/* + * Close an open file handle and release its memory. + * + * On failure, returns an error value and sets errno appropriately. + */ int -cfeof(cfp *fp) +EndCompressFileHandle(CompressFileHandle *CFH) { -#ifdef HAVE_LIBZ - if (fp->compressedfp) - return gzeof(fp->compressedfp); - else -#endif - return feof(fp->uncompressedfp); -} + int ret = 0; -const char * -get_cfp_error(cfp *fp) -{ -#ifdef HAVE_LIBZ - if (fp->compressedfp) - { - int errnum; - const char *errmsg = gzerror(fp->compressedfp, &errnum); + if (CFH->private_data) + ret = CFH->close_func(CFH); - if (errnum != Z_ERRNO) - return errmsg; - } -#endif - return strerror(errno); -} + free_keep_errno(CFH); -#ifdef HAVE_LIBZ -static int -hasSuffix(const char *filename, const char *suffix) -{ - int filenamelen = strlen(filename); - int suffixlen = strlen(suffix); - - if (filenamelen < suffixlen) - return 0; - - return memcmp(&filename[filenamelen - suffixlen], - suffix, - suffixlen) == 0; + return ret; } - -#endif |